1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 /* SPDX-License-Identifier: GPL-2.0 */ /* File: linux/posix_acl_xattr.h Extended attribute system call representation of Access Control Lists. Copyright (C) 2000 by Andreas Gruenbacher <a.gruenbacher@computer.org> Copyright (C) 2002 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com> */ #ifndef _POSIX_ACL_XATTR_H #define _POSIX_ACL_XATTR_H #include <uapi/linux/xattr.h> #include <uapi/linux/posix_acl_xattr.h> #include <linux/posix_acl.h> static inline size_t posix_acl_xattr_size(int count) { return (sizeof(struct posix_acl_xattr_header) + (count * sizeof(struct posix_acl_xattr_entry))); } static inline int posix_acl_xattr_count(size_t size) { if (size < sizeof(struct posix_acl_xattr_header)) return -1; size -= sizeof(struct posix_acl_xattr_header); if (size % sizeof(struct posix_acl_xattr_entry)) return -1; return size / sizeof(struct posix_acl_xattr_entry); } #ifdef CONFIG_FS_POSIX_ACL void posix_acl_fix_xattr_from_user(void *value, size_t size); void posix_acl_fix_xattr_to_user(void *value, size_t size); #else static inline void posix_acl_fix_xattr_from_user(void *value, size_t size) { } static inline void posix_acl_fix_xattr_to_user(void *value, size_t size) { } #endif struct posix_acl *posix_acl_from_xattr(struct user_namespace *user_ns, const void *value, size_t size); int posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, void *buffer, size_t size); extern const struct xattr_handler posix_acl_access_xattr_handler; extern const struct xattr_handler posix_acl_default_xattr_handler; #endif /* _POSIX_ACL_XATTR_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _UAPI_LINUX_BYTEORDER_LITTLE_ENDIAN_H #define _UAPI_LINUX_BYTEORDER_LITTLE_ENDIAN_H #ifndef __LITTLE_ENDIAN #define __LITTLE_ENDIAN 1234 #endif #ifndef __LITTLE_ENDIAN_BITFIELD #define __LITTLE_ENDIAN_BITFIELD #endif #include <linux/types.h> #include <linux/swab.h> #define __constant_htonl(x) ((__force __be32)___constant_swab32((x))) #define __constant_ntohl(x) ___constant_swab32((__force __be32)(x)) #define __constant_htons(x) ((__force __be16)___constant_swab16((x))) #define __constant_ntohs(x) ___constant_swab16((__force __be16)(x)) #define __constant_cpu_to_le64(x) ((__force __le64)(__u64)(x)) #define __constant_le64_to_cpu(x) ((__force __u64)(__le64)(x)) #define __constant_cpu_to_le32(x) ((__force __le32)(__u32)(x)) #define __constant_le32_to_cpu(x) ((__force __u32)(__le32)(x)) #define __constant_cpu_to_le16(x) ((__force __le16)(__u16)(x)) #define __constant_le16_to_cpu(x) ((__force __u16)(__le16)(x)) #define __constant_cpu_to_be64(x) ((__force __be64)___constant_swab64((x))) #define __constant_be64_to_cpu(x) ___constant_swab64((__force __u64)(__be64)(x)) #define __constant_cpu_to_be32(x) ((__force __be32)___constant_swab32((x))) #define __constant_be32_to_cpu(x) ___constant_swab32((__force __u32)(__be32)(x)) #define __constant_cpu_to_be16(x) ((__force __be16)___constant_swab16((x))) #define __constant_be16_to_cpu(x) ___constant_swab16((__force __u16)(__be16)(x)) #define __cpu_to_le64(x) ((__force __le64)(__u64)(x)) #define __le64_to_cpu(x) ((__force __u64)(__le64)(x)) #define __cpu_to_le32(x) ((__force __le32)(__u32)(x)) #define __le32_to_cpu(x) ((__force __u32)(__le32)(x)) #define __cpu_to_le16(x) ((__force __le16)(__u16)(x)) #define __le16_to_cpu(x) ((__force __u16)(__le16)(x)) #define __cpu_to_be64(x) ((__force __be64)__swab64((x))) #define __be64_to_cpu(x) __swab64((__force __u64)(__be64)(x)) #define __cpu_to_be32(x) ((__force __be32)__swab32((x))) #define __be32_to_cpu(x) __swab32((__force __u32)(__be32)(x)) #define __cpu_to_be16(x) ((__force __be16)__swab16((x))) #define __be16_to_cpu(x) __swab16((__force __u16)(__be16)(x)) static __always_inline __le64 __cpu_to_le64p(const __u64 *p) { return (__force __le64)*p; } static __always_inline __u64 __le64_to_cpup(const __le64 *p) { return (__force __u64)*p; } static __always_inline __le32 __cpu_to_le32p(const __u32 *p) { return (__force __le32)*p; } static __always_inline __u32 __le32_to_cpup(const __le32 *p) { return (__force __u32)*p; } static __always_inline __le16 __cpu_to_le16p(const __u16 *p) { return (__force __le16)*p; } static __always_inline __u16 __le16_to_cpup(const __le16 *p) { return (__force __u16)*p; } static __always_inline __be64 __cpu_to_be64p(const __u64 *p) { return (__force __be64)__swab64p(p); } static __always_inline __u64 __be64_to_cpup(const __be64 *p) { return __swab64p((__u64 *)p); } static __always_inline __be32 __cpu_to_be32p(const __u32 *p) { return (__force __be32)__swab32p(p); } static __always_inline __u32 __be32_to_cpup(const __be32 *p) { return __swab32p((__u32 *)p); } static __always_inline __be16 __cpu_to_be16p(const __u16 *p) { return (__force __be16)__swab16p(p); } static __always_inline __u16 __be16_to_cpup(const __be16 *p) { return __swab16p((__u16 *)p); } #define __cpu_to_le64s(x) do { (void)(x); } while (0) #define __le64_to_cpus(x) do { (void)(x); } while (0) #define __cpu_to_le32s(x) do { (void)(x); } while (0) #define __le32_to_cpus(x) do { (void)(x); } while (0) #define __cpu_to_le16s(x) do { (void)(x); } while (0) #define __le16_to_cpus(x) do { (void)(x); } while (0) #define __cpu_to_be64s(x) __swab64s((x)) #define __be64_to_cpus(x) __swab64s((x)) #define __cpu_to_be32s(x) __swab32s((x)) #define __be32_to_cpus(x) __swab32s((x)) #define __cpu_to_be16s(x) __swab16s((x)) #define __be16_to_cpus(x) __swab16s((x)) #endif /* _UAPI_LINUX_BYTEORDER_LITTLE_ENDIAN_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 /* * DRBG based on NIST SP800-90A * * Copyright Stephan Mueller <smueller@chronox.de>, 2014 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, and the entire permission notice in its entirety, * including the disclaimer of warranties. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * ALTERNATIVELY, this product may be distributed under the terms of * the GNU General Public License, in which case the provisions of the GPL are * required INSTEAD OF the above restrictions. (This clause is * necessary due to a potential bad interaction between the GPL and * the restrictions contained in a BSD-style copyright.) * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ #ifndef _DRBG_H #define _DRBG_H #include <linux/random.h> #include <linux/scatterlist.h> #include <crypto/hash.h> #include <crypto/skcipher.h> #include <linux/module.h> #include <linux/crypto.h> #include <linux/slab.h> #include <crypto/internal/rng.h> #include <crypto/rng.h> #include <linux/fips.h> #include <linux/mutex.h> #include <linux/list.h> #include <linux/workqueue.h> /* * Concatenation Helper and string operation helper * * SP800-90A requires the concatenation of different data. To avoid copying * buffers around or allocate additional memory, the following data structure * is used to point to the original memory with its size. In addition, it * is used to build a linked list. The linked list defines the concatenation * of individual buffers. The order of memory block referenced in that * linked list determines the order of concatenation. */ struct drbg_string { const unsigned char *buf; size_t len; struct list_head list; }; static inline void drbg_string_fill(struct drbg_string *string, const unsigned char *buf, size_t len) { string->buf = buf; string->len = len; INIT_LIST_HEAD(&string->list); } struct drbg_state; typedef uint32_t drbg_flag_t; struct drbg_core { drbg_flag_t flags; /* flags for the cipher */ __u8 statelen; /* maximum state length */ __u8 blocklen_bytes; /* block size of output in bytes */ char cra_name[CRYPTO_MAX_ALG_NAME]; /* mapping to kernel crypto API */ /* kernel crypto API backend cipher name */ char backend_cra_name[CRYPTO_MAX_ALG_NAME]; }; struct drbg_state_ops { int (*update)(struct drbg_state *drbg, struct list_head *seed, int reseed); int (*generate)(struct drbg_state *drbg, unsigned char *buf, unsigned int buflen, struct list_head *addtl); int (*crypto_init)(struct drbg_state *drbg); int (*crypto_fini)(struct drbg_state *drbg); }; struct drbg_test_data { struct drbg_string *testentropy; /* TEST PARAMETER: test entropy */ }; struct drbg_state { struct mutex drbg_mutex; /* lock around DRBG */ unsigned char *V; /* internal state 10.1.1.1 1a) */ unsigned char *Vbuf; /* hash: static value 10.1.1.1 1b) hmac / ctr: key */ unsigned char *C; unsigned char *Cbuf; /* Number of RNG requests since last reseed -- 10.1.1.1 1c) */ size_t reseed_ctr; size_t reseed_threshold; /* some memory the DRBG can use for its operation */ unsigned char *scratchpad; unsigned char *scratchpadbuf; void *priv_data; /* Cipher handle */ struct crypto_skcipher *ctr_handle; /* CTR mode cipher handle */ struct skcipher_request *ctr_req; /* CTR mode request handle */ __u8 *outscratchpadbuf; /* CTR mode output scratchpad */ __u8 *outscratchpad; /* CTR mode aligned outbuf */ struct crypto_wait ctr_wait; /* CTR mode async wait obj */ struct scatterlist sg_in, sg_out; /* CTR mode SGLs */ bool seeded; /* DRBG fully seeded? */ bool pr; /* Prediction resistance enabled? */ bool fips_primed; /* Continuous test primed? */ unsigned char *prev; /* FIPS 140-2 continuous test value */ struct work_struct seed_work; /* asynchronous seeding support */ struct crypto_rng *jent; const struct drbg_state_ops *d_ops; const struct drbg_core *core; struct drbg_string test_data; struct random_ready_callback random_ready; }; static inline __u8 drbg_statelen(struct drbg_state *drbg) { if (drbg && drbg->core) return drbg->core->statelen; return 0; } static inline __u8 drbg_blocklen(struct drbg_state *drbg) { if (drbg && drbg->core) return drbg->core->blocklen_bytes; return 0; } static inline __u8 drbg_keylen(struct drbg_state *drbg) { if (drbg && drbg->core) return (drbg->core->statelen - drbg->core->blocklen_bytes); return 0; } static inline size_t drbg_max_request_bytes(struct drbg_state *drbg) { /* SP800-90A requires the limit 2**19 bits, but we return bytes */ return (1 << 16); } static inline size_t drbg_max_addtl(struct drbg_state *drbg) { /* SP800-90A requires 2**35 bytes additional info str / pers str */ #if (__BITS_PER_LONG == 32) /* * SP800-90A allows smaller maximum numbers to be returned -- we * return SIZE_MAX - 1 to allow the verification of the enforcement * of this value in drbg_healthcheck_sanity. */ return (SIZE_MAX - 1); #else return (1UL<<35); #endif } static inline size_t drbg_max_requests(struct drbg_state *drbg) { /* SP800-90A requires 2**48 maximum requests before reseeding */ return (1<<20); } /* * This is a wrapper to the kernel crypto API function of * crypto_rng_generate() to allow the caller to provide additional data. * * @drng DRBG handle -- see crypto_rng_get_bytes * @outbuf output buffer -- see crypto_rng_get_bytes * @outlen length of output buffer -- see crypto_rng_get_bytes * @addtl_input additional information string input buffer * @addtllen length of additional information string buffer * * return * see crypto_rng_get_bytes */ static inline int crypto_drbg_get_bytes_addtl(struct crypto_rng *drng, unsigned char *outbuf, unsigned int outlen, struct drbg_string *addtl) { return crypto_rng_generate(drng, addtl->buf, addtl->len, outbuf, outlen); } /* * TEST code * * This is a wrapper to the kernel crypto API function of * crypto_rng_generate() to allow the caller to provide additional data and * allow furnishing of test_data * * @drng DRBG handle -- see crypto_rng_get_bytes * @outbuf output buffer -- see crypto_rng_get_bytes * @outlen length of output buffer -- see crypto_rng_get_bytes * @addtl_input additional information string input buffer * @addtllen length of additional information string buffer * @test_data filled test data * * return * see crypto_rng_get_bytes */ static inline int crypto_drbg_get_bytes_addtl_test(struct crypto_rng *drng, unsigned char *outbuf, unsigned int outlen, struct drbg_string *addtl, struct drbg_test_data *test_data) { crypto_rng_set_entropy(drng, test_data->testentropy->buf, test_data->testentropy->len); return crypto_rng_generate(drng, addtl->buf, addtl->len, outbuf, outlen); } /* * TEST code * * This is a wrapper to the kernel crypto API function of * crypto_rng_reset() to allow the caller to provide test_data * * @drng DRBG handle -- see crypto_rng_reset * @pers personalization string input buffer * @perslen length of additional information string buffer * @test_data filled test data * * return * see crypto_rng_reset */ static inline int crypto_drbg_reset_test(struct crypto_rng *drng, struct drbg_string *pers, struct drbg_test_data *test_data) { crypto_rng_set_entropy(drng, test_data->testentropy->buf, test_data->testentropy->len); return crypto_rng_reset(drng, pers->buf, pers->len); } /* DRBG type flags */ #define DRBG_CTR ((drbg_flag_t)1<<0) #define DRBG_HMAC ((drbg_flag_t)1<<1) #define DRBG_HASH ((drbg_flag_t)1<<2) #define DRBG_TYPE_MASK (DRBG_CTR | DRBG_HMAC | DRBG_HASH) /* DRBG strength flags */ #define DRBG_STRENGTH128 ((drbg_flag_t)1<<3) #define DRBG_STRENGTH192 ((drbg_flag_t)1<<4) #define DRBG_STRENGTH256 ((drbg_flag_t)1<<5) #define DRBG_STRENGTH_MASK (DRBG_STRENGTH128 | DRBG_STRENGTH192 | \ DRBG_STRENGTH256) enum drbg_prefixes { DRBG_PREFIX0 = 0x00, DRBG_PREFIX1, DRBG_PREFIX2, DRBG_PREFIX3 }; #endif /* _DRBG_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_ERR_H #define _LINUX_ERR_H #include <linux/compiler.h> #include <linux/types.h> #include <asm/errno.h> /* * Kernel pointers have redundant information, so we can use a * scheme where we can return either an error code or a normal * pointer with the same return value. * * This should be a per-architecture thing, to allow different * error and pointer decisions. */ #define MAX_ERRNO 4095 #ifndef __ASSEMBLY__ #define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO) static inline void * __must_check ERR_PTR(long error) { return (void *) error; } static inline long __must_check PTR_ERR(__force const void *ptr) { return (long) ptr; } static inline bool __must_check IS_ERR(__force const void *ptr) { return IS_ERR_VALUE((unsigned long)ptr); } static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr) { return unlikely(!ptr) || IS_ERR_VALUE((unsigned long)ptr); } /** * ERR_CAST - Explicitly cast an error-valued pointer to another pointer type * @ptr: The pointer to cast. * * Explicitly cast an error-valued pointer to another pointer type in such a * way as to make it clear that's what's going on. */ static inline void * __must_check ERR_CAST(__force const void *ptr) { /* cast away the const */ return (void *) ptr; } static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr) { if (IS_ERR(ptr)) return PTR_ERR(ptr); else return 0; } #endif #endif /* _LINUX_ERR_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TTY_H #define _LINUX_TTY_H #include <linux/fs.h> #include <linux/major.h> #include <linux/termios.h> #include <linux/workqueue.h> #include <linux/tty_driver.h> #include <linux/tty_ldisc.h> #include <linux/mutex.h> #include <linux/tty_flags.h> #include <linux/seq_file.h> #include <uapi/linux/tty.h> #include <linux/rwsem.h> #include <linux/llist.h> /* * Lock subclasses for tty locks * * TTY_LOCK_NORMAL is for normal ttys and master ptys. * TTY_LOCK_SLAVE is for slave ptys only. * * Lock subclasses are necessary for handling nested locking with pty pairs. * tty locks which use nested locking: * * legacy_mutex - Nested tty locks are necessary for releasing pty pairs. * The stable lock order is master pty first, then slave pty. * termios_rwsem - The stable lock order is tty_buffer lock->termios_rwsem. * Subclassing this lock enables the slave pty to hold its * termios_rwsem when claiming the master tty_buffer lock. * tty_buffer lock - slave ptys can claim nested buffer lock when handling * signal chars. The stable lock order is slave pty, then * master. */ enum { TTY_LOCK_NORMAL = 0, TTY_LOCK_SLAVE, }; /* * (Note: the *_driver.minor_start values 1, 64, 128, 192 are * hardcoded at present.) */ #define NR_UNIX98_PTY_DEFAULT 4096 /* Default maximum for Unix98 ptys */ #define NR_UNIX98_PTY_RESERVE 1024 /* Default reserve for main devpts */ #define NR_UNIX98_PTY_MAX (1 << MINORBITS) /* Absolute limit */ /* * This character is the same as _POSIX_VDISABLE: it cannot be used as * a c_cc[] character, but indicates that a particular special character * isn't in use (eg VINTR has no character etc) */ #define __DISABLED_CHAR '\0' struct tty_buffer { union { struct tty_buffer *next; struct llist_node free; }; int used; int size; int commit; int read; int flags; /* Data points here */ unsigned long data[]; }; /* Values for .flags field of tty_buffer */ #define TTYB_NORMAL 1 /* buffer has no flags buffer */ static inline unsigned char *char_buf_ptr(struct tty_buffer *b, int ofs) { return ((unsigned char *)b->data) + ofs; } static inline char *flag_buf_ptr(struct tty_buffer *b, int ofs) { return (char *)char_buf_ptr(b, ofs) + b->size; } struct tty_bufhead { struct tty_buffer *head; /* Queue head */ struct work_struct work; struct mutex lock; atomic_t priority; struct tty_buffer sentinel; struct llist_head free; /* Free queue head */ atomic_t mem_used; /* In-use buffers excluding free list */ int mem_limit; struct tty_buffer *tail; /* Active buffer */ }; /* * When a break, frame error, or parity error happens, these codes are * stuffed into the flags buffer. */ #define TTY_NORMAL 0 #define TTY_BREAK 1 #define TTY_FRAME 2 #define TTY_PARITY 3 #define TTY_OVERRUN 4 #define INTR_CHAR(tty) ((tty)->termios.c_cc[VINTR]) #define QUIT_CHAR(tty) ((tty)->termios.c_cc[VQUIT]) #define ERASE_CHAR(tty) ((tty)->termios.c_cc[VERASE]) #define KILL_CHAR(tty) ((tty)->termios.c_cc[VKILL]) #define EOF_CHAR(tty) ((tty)->termios.c_cc[VEOF]) #define TIME_CHAR(tty) ((tty)->termios.c_cc[VTIME]) #define MIN_CHAR(tty) ((tty)->termios.c_cc[VMIN]) #define SWTC_CHAR(tty) ((tty)->termios.c_cc[VSWTC]) #define START_CHAR(tty) ((tty)->termios.c_cc[VSTART]) #define STOP_CHAR(tty) ((tty)->termios.c_cc[VSTOP]) #define SUSP_CHAR(tty) ((tty)->termios.c_cc[VSUSP]) #define EOL_CHAR(tty) ((tty)->termios.c_cc[VEOL]) #define REPRINT_CHAR(tty) ((tty)->termios.c_cc[VREPRINT]) #define DISCARD_CHAR(tty) ((tty)->termios.c_cc[VDISCARD]) #define WERASE_CHAR(tty) ((tty)->termios.c_cc[VWERASE]) #define LNEXT_CHAR(tty) ((tty)->termios.c_cc[VLNEXT]) #define EOL2_CHAR(tty) ((tty)->termios.c_cc[VEOL2]) #define _I_FLAG(tty, f) ((tty)->termios.c_iflag & (f)) #define _O_FLAG(tty, f) ((tty)->termios.c_oflag & (f)) #define _C_FLAG(tty, f) ((tty)->termios.c_cflag & (f)) #define _L_FLAG(tty, f) ((tty)->termios.c_lflag & (f)) #define I_IGNBRK(tty) _I_FLAG((tty), IGNBRK) #define I_BRKINT(tty) _I_FLAG((tty), BRKINT) #define I_IGNPAR(tty) _I_FLAG((tty), IGNPAR) #define I_PARMRK(tty) _I_FLAG((tty), PARMRK) #define I_INPCK(tty) _I_FLAG((tty), INPCK) #define I_ISTRIP(tty) _I_FLAG((tty), ISTRIP) #define I_INLCR(tty) _I_FLAG((tty), INLCR) #define I_IGNCR(tty) _I_FLAG((tty), IGNCR) #define I_ICRNL(tty) _I_FLAG((tty), ICRNL) #define I_IUCLC(tty) _I_FLAG((tty), IUCLC) #define I_IXON(tty) _I_FLAG((tty), IXON) #define I_IXANY(tty) _I_FLAG((tty), IXANY) #define I_IXOFF(tty) _I_FLAG((tty), IXOFF) #define I_IMAXBEL(tty) _I_FLAG((tty), IMAXBEL) #define I_IUTF8(tty) _I_FLAG((tty), IUTF8) #define O_OPOST(tty) _O_FLAG((tty), OPOST) #define O_OLCUC(tty) _O_FLAG((tty), OLCUC) #define O_ONLCR(tty) _O_FLAG((tty), ONLCR) #define O_OCRNL(tty) _O_FLAG((tty), OCRNL) #define O_ONOCR(tty) _O_FLAG((tty), ONOCR) #define O_ONLRET(tty) _O_FLAG((tty), ONLRET) #define O_OFILL(tty) _O_FLAG((tty), OFILL) #define O_OFDEL(tty) _O_FLAG((tty), OFDEL) #define O_NLDLY(tty) _O_FLAG((tty), NLDLY) #define O_CRDLY(tty) _O_FLAG((tty), CRDLY) #define O_TABDLY(tty) _O_FLAG((tty), TABDLY) #define O_BSDLY(tty) _O_FLAG((tty), BSDLY) #define O_VTDLY(tty) _O_FLAG((tty), VTDLY) #define O_FFDLY(tty) _O_FLAG((tty), FFDLY) #define C_BAUD(tty) _C_FLAG((tty), CBAUD) #define C_CSIZE(tty) _C_FLAG((tty), CSIZE) #define C_CSTOPB(tty) _C_FLAG((tty), CSTOPB) #define C_CREAD(tty) _C_FLAG((tty), CREAD) #define C_PARENB(tty) _C_FLAG((tty), PARENB) #define C_PARODD(tty) _C_FLAG((tty), PARODD) #define C_HUPCL(tty) _C_FLAG((tty), HUPCL) #define C_CLOCAL(tty) _C_FLAG((tty), CLOCAL) #define C_CIBAUD(tty) _C_FLAG((tty), CIBAUD) #define C_CRTSCTS(tty) _C_FLAG((tty), CRTSCTS) #define C_CMSPAR(tty) _C_FLAG((tty), CMSPAR) #define L_ISIG(tty) _L_FLAG((tty), ISIG) #define L_ICANON(tty) _L_FLAG((tty), ICANON) #define L_XCASE(tty) _L_FLAG((tty), XCASE) #define L_ECHO(tty) _L_FLAG((tty), ECHO) #define L_ECHOE(tty) _L_FLAG((tty), ECHOE) #define L_ECHOK(tty) _L_FLAG((tty), ECHOK) #define L_ECHONL(tty) _L_FLAG((tty), ECHONL) #define L_NOFLSH(tty) _L_FLAG((tty), NOFLSH) #define L_TOSTOP(tty) _L_FLAG((tty), TOSTOP) #define L_ECHOCTL(tty) _L_FLAG((tty), ECHOCTL) #define L_ECHOPRT(tty) _L_FLAG((tty), ECHOPRT) #define L_ECHOKE(tty) _L_FLAG((tty), ECHOKE) #define L_FLUSHO(tty) _L_FLAG((tty), FLUSHO) #define L_PENDIN(tty) _L_FLAG((tty), PENDIN) #define L_IEXTEN(tty) _L_FLAG((tty), IEXTEN) #define L_EXTPROC(tty) _L_FLAG((tty), EXTPROC) struct device; struct signal_struct; /* * Port level information. Each device keeps its own port level information * so provide a common structure for those ports wanting to use common support * routines. * * The tty port has a different lifetime to the tty so must be kept apart. * In addition be careful as tty -> port mappings are valid for the life * of the tty object but in many cases port -> tty mappings are valid only * until a hangup so don't use the wrong path. */ struct tty_port; struct tty_port_operations { /* Return 1 if the carrier is raised */ int (*carrier_raised)(struct tty_port *port); /* Control the DTR line */ void (*dtr_rts)(struct tty_port *port, int raise); /* Called when the last close completes or a hangup finishes IFF the port was initialized. Do not use to free resources. Called under the port mutex to serialize against activate/shutdowns */ void (*shutdown)(struct tty_port *port); /* Called under the port mutex from tty_port_open, serialized using the port mutex */ /* FIXME: long term getting the tty argument *out* of this would be good for consoles */ int (*activate)(struct tty_port *port, struct tty_struct *tty); /* Called on the final put of a port */ void (*destruct)(struct tty_port *port); }; struct tty_port_client_operations { int (*receive_buf)(struct tty_port *port, const unsigned char *, const unsigned char *, size_t); void (*write_wakeup)(struct tty_port *port); }; extern const struct tty_port_client_operations tty_port_default_client_ops; struct tty_port { struct tty_bufhead buf; /* Locked internally */ struct tty_struct *tty; /* Back pointer */ struct tty_struct *itty; /* internal back ptr */ const struct tty_port_operations *ops; /* Port operations */ const struct tty_port_client_operations *client_ops; /* Port client operations */ spinlock_t lock; /* Lock protecting tty field */ int blocked_open; /* Waiting to open */ int count; /* Usage count */ wait_queue_head_t open_wait; /* Open waiters */ wait_queue_head_t delta_msr_wait; /* Modem status change */ unsigned long flags; /* User TTY flags ASYNC_ */ unsigned long iflags; /* Internal flags TTY_PORT_ */ unsigned char console:1, /* port is a console */ low_latency:1; /* optional: tune for latency */ struct mutex mutex; /* Locking */ struct mutex buf_mutex; /* Buffer alloc lock */ unsigned char *xmit_buf; /* Optional buffer */ unsigned int close_delay; /* Close port delay */ unsigned int closing_wait; /* Delay for output */ int drain_delay; /* Set to zero if no pure time based drain is needed else set to size of fifo */ struct kref kref; /* Ref counter */ void *client_data; }; /* tty_port::iflags bits -- use atomic bit ops */ #define TTY_PORT_INITIALIZED 0 /* device is initialized */ #define TTY_PORT_SUSPENDED 1 /* device is suspended */ #define TTY_PORT_ACTIVE 2 /* device is open */ /* * uart drivers: use the uart_port::status field and the UPSTAT_* defines * for s/w-based flow control steering and carrier detection status */ #define TTY_PORT_CTS_FLOW 3 /* h/w flow control enabled */ #define TTY_PORT_CHECK_CD 4 /* carrier detect enabled */ #define TTY_PORT_KOPENED 5 /* device exclusively opened by kernel */ /* * Where all of the state associated with a tty is kept while the tty * is open. Since the termios state should be kept even if the tty * has been closed --- for things like the baud rate, etc --- it is * not stored here, but rather a pointer to the real state is stored * here. Possible the winsize structure should have the same * treatment, but (1) the default 80x24 is usually right and (2) it's * most often used by a windowing system, which will set the correct * size each time the window is created or resized anyway. * - TYT, 9/14/92 */ struct tty_operations; struct tty_struct { int magic; struct kref kref; struct device *dev; struct tty_driver *driver; const struct tty_operations *ops; int index; /* Protects ldisc changes: Lock tty not pty */ struct ld_semaphore ldisc_sem; struct tty_ldisc *ldisc; struct mutex atomic_write_lock; struct mutex legacy_mutex; struct mutex throttle_mutex; struct rw_semaphore termios_rwsem; struct mutex winsize_mutex; spinlock_t ctrl_lock; spinlock_t flow_lock; /* Termios values are protected by the termios rwsem */ struct ktermios termios, termios_locked; char name[64]; struct pid *pgrp; /* Protected by ctrl lock */ /* * Writes protected by both ctrl lock and legacy mutex, readers must use * at least one of them. */ struct pid *session; unsigned long flags; int count; struct winsize winsize; /* winsize_mutex */ unsigned long stopped:1, /* flow_lock */ flow_stopped:1, unused:BITS_PER_LONG - 2; int hw_stopped; unsigned long ctrl_status:8, /* ctrl_lock */ packet:1, unused_ctrl:BITS_PER_LONG - 9; unsigned int receive_room; /* Bytes free for queue */ int flow_change; struct tty_struct *link; struct fasync_struct *fasync; wait_queue_head_t write_wait; wait_queue_head_t read_wait; struct work_struct hangup_work; void *disc_data; void *driver_data; spinlock_t files_lock; /* protects tty_files list */ struct list_head tty_files; #define N_TTY_BUF_SIZE 4096 int closing; unsigned char *write_buf; int write_cnt; /* If the tty has a pending do_SAK, queue it here - akpm */ struct work_struct SAK_work; struct tty_port *port; } __randomize_layout; /* Each of a tty's open files has private_data pointing to tty_file_private */ struct tty_file_private { struct tty_struct *tty; struct file *file; struct list_head list; }; /* tty magic number */ #define TTY_MAGIC 0x5401 /* * These bits are used in the flags field of the tty structure. * * So that interrupts won't be able to mess up the queues, * copy_to_cooked must be atomic with respect to itself, as must * tty->write. Thus, you must use the inline functions set_bit() and * clear_bit() to make things atomic. */ #define TTY_THROTTLED 0 /* Call unthrottle() at threshold min */ #define TTY_IO_ERROR 1 /* Cause an I/O error (may be no ldisc too) */ #define TTY_OTHER_CLOSED 2 /* Other side (if any) has closed */ #define TTY_EXCLUSIVE 3 /* Exclusive open mode */ #define TTY_DO_WRITE_WAKEUP 5 /* Call write_wakeup after queuing new */ #define TTY_LDISC_OPEN 11 /* Line discipline is open */ #define TTY_PTY_LOCK 16 /* pty private */ #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ #define TTY_HUPPED 18 /* Post driver->hangup() */ #define TTY_HUPPING 19 /* Hangup in progress */ #define TTY_LDISC_CHANGING 20 /* Change pending - non-block IO */ #define TTY_LDISC_HALTED 22 /* Line discipline is halted */ /* Values for tty->flow_change */ #define TTY_THROTTLE_SAFE 1 #define TTY_UNTHROTTLE_SAFE 2 static inline void __tty_set_flow_change(struct tty_struct *tty, int val) { tty->flow_change = val; } static inline void tty_set_flow_change(struct tty_struct *tty, int val) { tty->flow_change = val; smp_mb(); } static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file) { return file->f_flags & O_NONBLOCK || test_bit(TTY_LDISC_CHANGING, &tty->flags); } static inline bool tty_io_error(struct tty_struct *tty) { return test_bit(TTY_IO_ERROR, &tty->flags); } static inline bool tty_throttled(struct tty_struct *tty) { return test_bit(TTY_THROTTLED, &tty->flags); } #ifdef CONFIG_TTY extern void tty_kref_put(struct tty_struct *tty); extern struct pid *tty_get_pgrp(struct tty_struct *tty); extern void tty_vhangup_self(void); extern void disassociate_ctty(int priv); extern dev_t tty_devnum(struct tty_struct *tty); extern void proc_clear_tty(struct task_struct *p); extern struct tty_struct *get_current_tty(void); /* tty_io.c */ extern int __init tty_init(void); extern const char *tty_name(const struct tty_struct *tty); extern struct tty_struct *tty_kopen(dev_t device); extern void tty_kclose(struct tty_struct *tty); extern int tty_dev_name_to_number(const char *name, dev_t *number); extern int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout); extern void tty_ldisc_unlock(struct tty_struct *tty); extern ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *); #else static inline void tty_kref_put(struct tty_struct *tty) { } static inline struct pid *tty_get_pgrp(struct tty_struct *tty) { return NULL; } static inline void tty_vhangup_self(void) { } static inline void disassociate_ctty(int priv) { } static inline dev_t tty_devnum(struct tty_struct *tty) { return 0; } static inline void proc_clear_tty(struct task_struct *p) { } static inline struct tty_struct *get_current_tty(void) { return NULL; } /* tty_io.c */ static inline int __init tty_init(void) { return 0; } static inline const char *tty_name(const struct tty_struct *tty) { return "(none)"; } static inline struct tty_struct *tty_kopen(dev_t device) { return ERR_PTR(-ENODEV); } static inline void tty_kclose(struct tty_struct *tty) { } static inline int tty_dev_name_to_number(const char *name, dev_t *number) { return -ENOTSUPP; } #endif extern struct ktermios tty_std_termios; extern int vcs_init(void); extern struct class *tty_class; /** * tty_kref_get - get a tty reference * @tty: tty device * * Return a new reference to a tty object. The caller must hold * sufficient locks/counts to ensure that their existing reference cannot * go away */ static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) { if (tty) kref_get(&tty->kref); return tty; } extern const char *tty_driver_name(const struct tty_struct *tty); extern void tty_wait_until_sent(struct tty_struct *tty, long timeout); extern int __tty_check_change(struct tty_struct *tty, int sig); extern int tty_check_change(struct tty_struct *tty); extern void __stop_tty(struct tty_struct *tty); extern void stop_tty(struct tty_struct *tty); extern void __start_tty(struct tty_struct *tty); extern void start_tty(struct tty_struct *tty); extern int tty_register_driver(struct tty_driver *driver); extern int tty_unregister_driver(struct tty_driver *driver); extern struct device *tty_register_device(struct tty_driver *driver, unsigned index, struct device *dev); extern struct device *tty_register_device_attr(struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp); extern void tty_unregister_device(struct tty_driver *driver, unsigned index); extern void tty_write_message(struct tty_struct *tty, char *msg); extern int tty_send_xchar(struct tty_struct *tty, char ch); extern int tty_put_char(struct tty_struct *tty, unsigned char c); extern int tty_chars_in_buffer(struct tty_struct *tty); extern int tty_write_room(struct tty_struct *tty); extern void tty_driver_flush_buffer(struct tty_struct *tty); extern void tty_throttle(struct tty_struct *tty); extern void tty_unthrottle(struct tty_struct *tty); extern int tty_throttle_safe(struct tty_struct *tty); extern int tty_unthrottle_safe(struct tty_struct *tty); extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws); extern int is_current_pgrp_orphaned(void); extern void tty_hangup(struct tty_struct *tty); extern void tty_vhangup(struct tty_struct *tty); extern void tty_vhangup_session(struct tty_struct *tty); extern int tty_hung_up_p(struct file *filp); extern void do_SAK(struct tty_struct *tty); extern void __do_SAK(struct tty_struct *tty); extern void tty_open_proc_set_tty(struct file *filp, struct tty_struct *tty); extern int tty_signal_session_leader(struct tty_struct *tty, int exit_session); extern void session_clear_tty(struct pid *session); extern void no_tty(void); extern void tty_buffer_free_all(struct tty_port *port); extern void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld); extern void tty_buffer_init(struct tty_port *port); extern void tty_buffer_set_lock_subclass(struct tty_port *port); extern bool tty_buffer_restart_work(struct tty_port *port); extern bool tty_buffer_cancel_work(struct tty_port *port); extern void tty_buffer_flush_work(struct tty_port *port); extern speed_t tty_termios_baud_rate(struct ktermios *termios); extern speed_t tty_termios_input_baud_rate(struct ktermios *termios); extern void tty_termios_encode_baud_rate(struct ktermios *termios, speed_t ibaud, speed_t obaud); extern void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud, speed_t obaud); /** * tty_get_baud_rate - get tty bit rates * @tty: tty to query * * Returns the baud rate as an integer for this terminal. The * termios lock must be held by the caller and the terminal bit * flags may be updated. * * Locking: none */ static inline speed_t tty_get_baud_rate(struct tty_struct *tty) { return tty_termios_baud_rate(&tty->termios); } extern void tty_termios_copy_hw(struct ktermios *new, struct ktermios *old); extern int tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b); extern int tty_set_termios(struct tty_struct *tty, struct ktermios *kt); extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *); extern void tty_ldisc_deref(struct tty_ldisc *); extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *); extern void tty_ldisc_hangup(struct tty_struct *tty, bool reset); extern int tty_ldisc_reinit(struct tty_struct *tty, int disc); extern const struct seq_operations tty_ldiscs_seq_ops; extern void tty_wakeup(struct tty_struct *tty); extern void tty_ldisc_flush(struct tty_struct *tty); extern long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg); extern int tty_mode_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg); extern long tty_jobctrl_ioctl(struct tty_struct *tty, struct tty_struct *real_tty, struct file *file, unsigned int cmd, unsigned long arg); extern int tty_perform_flush(struct tty_struct *tty, unsigned long arg); extern void tty_default_fops(struct file_operations *fops); extern struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx); extern int tty_alloc_file(struct file *file); extern void tty_add_file(struct tty_struct *tty, struct file *file); extern void tty_free_file(struct file *file); extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx); extern void tty_release_struct(struct tty_struct *tty, int idx); extern int tty_release(struct inode *inode, struct file *filp); extern void tty_init_termios(struct tty_struct *tty); extern void tty_save_termios(struct tty_struct *tty); extern int tty_standard_install(struct tty_driver *driver, struct tty_struct *tty); extern struct mutex tty_mutex; #define tty_is_writelocked(tty) (mutex_is_locked(&tty->atomic_write_lock)) extern void tty_port_init(struct tty_port *port); extern void tty_port_link_device(struct tty_port *port, struct tty_driver *driver, unsigned index); extern struct device *tty_port_register_device(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device); extern struct device *tty_port_register_device_attr(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp); extern struct device *tty_port_register_device_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device); extern struct device *tty_port_register_device_attr_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp); extern void tty_port_unregister_device(struct tty_port *port, struct tty_driver *driver, unsigned index); extern int tty_port_alloc_xmit_buf(struct tty_port *port); extern void tty_port_free_xmit_buf(struct tty_port *port); extern void tty_port_destroy(struct tty_port *port); extern void tty_port_put(struct tty_port *port); static inline struct tty_port *tty_port_get(struct tty_port *port) { if (port && kref_get_unless_zero(&port->kref)) return port; return NULL; } /* If the cts flow control is enabled, return true. */ static inline bool tty_port_cts_enabled(struct tty_port *port) { return test_bit(TTY_PORT_CTS_FLOW, &port->iflags); } static inline void tty_port_set_cts_flow(struct tty_port *port, bool val) { if (val) set_bit(TTY_PORT_CTS_FLOW, &port->iflags); else clear_bit(TTY_PORT_CTS_FLOW, &port->iflags); } static inline bool tty_port_active(struct tty_port *port) { return test_bit(TTY_PORT_ACTIVE, &port->iflags); } static inline void tty_port_set_active(struct tty_port *port, bool val) { if (val) set_bit(TTY_PORT_ACTIVE, &port->iflags); else clear_bit(TTY_PORT_ACTIVE, &port->iflags); } static inline bool tty_port_check_carrier(struct tty_port *port) { return test_bit(TTY_PORT_CHECK_CD, &port->iflags); } static inline void tty_port_set_check_carrier(struct tty_port *port, bool val) { if (val) set_bit(TTY_PORT_CHECK_CD, &port->iflags); else clear_bit(TTY_PORT_CHECK_CD, &port->iflags); } static inline bool tty_port_suspended(struct tty_port *port) { return test_bit(TTY_PORT_SUSPENDED, &port->iflags); } static inline void tty_port_set_suspended(struct tty_port *port, bool val) { if (val) set_bit(TTY_PORT_SUSPENDED, &port->iflags); else clear_bit(TTY_PORT_SUSPENDED, &port->iflags); } static inline bool tty_port_initialized(struct tty_port *port) { return test_bit(TTY_PORT_INITIALIZED, &port->iflags); } static inline void tty_port_set_initialized(struct tty_port *port, bool val) { if (val) set_bit(TTY_PORT_INITIALIZED, &port->iflags); else clear_bit(TTY_PORT_INITIALIZED, &port->iflags); } static inline bool tty_port_kopened(struct tty_port *port) { return test_bit(TTY_PORT_KOPENED, &port->iflags); } static inline void tty_port_set_kopened(struct tty_port *port, bool val) { if (val) set_bit(TTY_PORT_KOPENED, &port->iflags); else clear_bit(TTY_PORT_KOPENED, &port->iflags); } extern struct tty_struct *tty_port_tty_get(struct tty_port *port); extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); extern int tty_port_carrier_raised(struct tty_port *port); extern void tty_port_raise_dtr_rts(struct tty_port *port); extern void tty_port_lower_dtr_rts(struct tty_port *port); extern void tty_port_hangup(struct tty_port *port); extern void tty_port_tty_hangup(struct tty_port *port, bool check_clocal); extern void tty_port_tty_wakeup(struct tty_port *port); extern int tty_port_block_til_ready(struct tty_port *port, struct tty_struct *tty, struct file *filp); extern int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct file *filp); extern void tty_port_close_end(struct tty_port *port, struct tty_struct *tty); extern void tty_port_close(struct tty_port *port, struct tty_struct *tty, struct file *filp); extern int tty_port_install(struct tty_port *port, struct tty_driver *driver, struct tty_struct *tty); extern int tty_port_open(struct tty_port *port, struct tty_struct *tty, struct file *filp); static inline int tty_port_users(struct tty_port *port) { return port->count + port->blocked_open; } extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc); extern int tty_unregister_ldisc(int disc); extern int tty_set_ldisc(struct tty_struct *tty, int disc); extern int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty); extern void tty_ldisc_release(struct tty_struct *tty); extern int __must_check tty_ldisc_init(struct tty_struct *tty); extern void tty_ldisc_deinit(struct tty_struct *tty); extern int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p, char *f, int count); /* n_tty.c */ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops); #ifdef CONFIG_TTY extern void __init n_tty_init(void); #else static inline void n_tty_init(void) { } #endif /* tty_audit.c */ #ifdef CONFIG_AUDIT extern void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size); extern void tty_audit_exit(void); extern void tty_audit_fork(struct signal_struct *sig); extern void tty_audit_tiocsti(struct tty_struct *tty, char ch); extern int tty_audit_push(void); #else static inline void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size) { } static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) { } static inline void tty_audit_exit(void) { } static inline void tty_audit_fork(struct signal_struct *sig) { } static inline int tty_audit_push(void) { return 0; } #endif /* tty_ioctl.c */ extern int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg); /* vt.c */ extern int vt_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg); extern long vt_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg); /* tty_mutex.c */ /* functions for preparation of BKL removal */ extern void tty_lock(struct tty_struct *tty); extern int tty_lock_interruptible(struct tty_struct *tty); extern void tty_unlock(struct tty_struct *tty); extern void tty_lock_slave(struct tty_struct *tty); extern void tty_unlock_slave(struct tty_struct *tty); extern void tty_set_lock_subclass(struct tty_struct *tty); #ifdef CONFIG_PROC_FS extern void proc_tty_register_driver(struct tty_driver *); extern void proc_tty_unregister_driver(struct tty_driver *); #else static inline void proc_tty_register_driver(struct tty_driver *d) {} static inline void proc_tty_unregister_driver(struct tty_driver *d) {} #endif #define tty_msg(fn, tty, f, ...) \ fn("%s %s: " f, tty_driver_name(tty), tty_name(tty), ##__VA_ARGS__) #define tty_debug(tty, f, ...) tty_msg(pr_debug, tty, f, ##__VA_ARGS__) #define tty_info(tty, f, ...) tty_msg(pr_info, tty, f, ##__VA_ARGS__) #define tty_notice(tty, f, ...) tty_msg(pr_notice, tty, f, ##__VA_ARGS__) #define tty_warn(tty, f, ...) tty_msg(pr_warn, tty, f, ##__VA_ARGS__) #define tty_err(tty, f, ...) tty_msg(pr_err, tty, f, ##__VA_ARGS__) #define tty_info_ratelimited(tty, f, ...) \ tty_msg(pr_info_ratelimited, tty, f, ##__VA_ARGS__) #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * acpi.h - ACPI Interface * * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> */ #ifndef _LINUX_ACPI_H #define _LINUX_ACPI_H #include <linux/errno.h> #include <linux/ioport.h> /* for struct resource */ #include <linux/irqdomain.h> #include <linux/resource_ext.h> #include <linux/device.h> #include <linux/property.h> #include <linux/uuid.h> #ifndef _LINUX #define _LINUX #endif #include <acpi/acpi.h> #ifdef CONFIG_ACPI #include <linux/list.h> #include <linux/mod_devicetable.h> #include <linux/dynamic_debug.h> #include <linux/module.h> #include <linux/mutex.h> #include <acpi/acpi_bus.h> #include <acpi/acpi_drivers.h> #include <acpi/acpi_numa.h> #include <acpi/acpi_io.h> #include <asm/acpi.h> static inline acpi_handle acpi_device_handle(struct acpi_device *adev) { return adev ? adev->handle : NULL; } #define ACPI_COMPANION(dev) to_acpi_device_node((dev)->fwnode) #define ACPI_COMPANION_SET(dev, adev) set_primary_fwnode(dev, (adev) ? \ acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) #define ACPI_HANDLE_FWNODE(fwnode) \ acpi_device_handle(to_acpi_device_node(fwnode)) static inline struct fwnode_handle *acpi_alloc_fwnode_static(void) { struct fwnode_handle *fwnode; fwnode = kzalloc(sizeof(struct fwnode_handle), GFP_KERNEL); if (!fwnode) return NULL; fwnode->ops = &acpi_static_fwnode_ops; return fwnode; } static inline void acpi_free_fwnode_static(struct fwnode_handle *fwnode) { if (WARN_ON(!is_acpi_static_node(fwnode))) return; kfree(fwnode); } /** * ACPI_DEVICE_CLASS - macro used to describe an ACPI device with * the PCI-defined class-code information * * @_cls : the class, subclass, prog-if triple for this device * @_msk : the class mask for this device * * This macro is used to create a struct acpi_device_id that matches a * specific PCI class. The .id and .driver_data fields will be left * initialized with the default value. */ #define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (_cls), .cls_msk = (_msk), static inline bool has_acpi_companion(struct device *dev) { return is_acpi_device_node(dev->fwnode); } static inline void acpi_preset_companion(struct device *dev, struct acpi_device *parent, u64 addr) { ACPI_COMPANION_SET(dev, acpi_find_child_device(parent, addr, false)); } static inline const char *acpi_dev_name(struct acpi_device *adev) { return dev_name(&adev->dev); } struct device *acpi_get_first_physical_node(struct acpi_device *adev); enum acpi_irq_model_id { ACPI_IRQ_MODEL_PIC = 0, ACPI_IRQ_MODEL_IOAPIC, ACPI_IRQ_MODEL_IOSAPIC, ACPI_IRQ_MODEL_PLATFORM, ACPI_IRQ_MODEL_GIC, ACPI_IRQ_MODEL_COUNT }; extern enum acpi_irq_model_id acpi_irq_model; enum acpi_interrupt_id { ACPI_INTERRUPT_PMI = 1, ACPI_INTERRUPT_INIT, ACPI_INTERRUPT_CPEI, ACPI_INTERRUPT_COUNT }; #define ACPI_SPACE_MEM 0 enum acpi_address_range_id { ACPI_ADDRESS_RANGE_MEMORY = 1, ACPI_ADDRESS_RANGE_RESERVED = 2, ACPI_ADDRESS_RANGE_ACPI = 3, ACPI_ADDRESS_RANGE_NVS = 4, ACPI_ADDRESS_RANGE_COUNT }; /* Table Handlers */ union acpi_subtable_headers { struct acpi_subtable_header common; struct acpi_hmat_structure hmat; }; typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table); typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header, const unsigned long end); /* Debugger support */ struct acpi_debugger_ops { int (*create_thread)(acpi_osd_exec_callback function, void *context); ssize_t (*write_log)(const char *msg); ssize_t (*read_cmd)(char *buffer, size_t length); int (*wait_command_ready)(bool single_step, char *buffer, size_t length); int (*notify_command_complete)(void); }; struct acpi_debugger { const struct acpi_debugger_ops *ops; struct module *owner; struct mutex lock; }; #ifdef CONFIG_ACPI_DEBUGGER int __init acpi_debugger_init(void); int acpi_register_debugger(struct module *owner, const struct acpi_debugger_ops *ops); void acpi_unregister_debugger(const struct acpi_debugger_ops *ops); int acpi_debugger_create_thread(acpi_osd_exec_callback function, void *context); ssize_t acpi_debugger_write_log(const char *msg); ssize_t acpi_debugger_read_cmd(char *buffer, size_t buffer_length); int acpi_debugger_wait_command_ready(void); int acpi_debugger_notify_command_complete(void); #else static inline int acpi_debugger_init(void) { return -ENODEV; } static inline int acpi_register_debugger(struct module *owner, const struct acpi_debugger_ops *ops) { return -ENODEV; } static inline void acpi_unregister_debugger(const struct acpi_debugger_ops *ops) { } static inline int acpi_debugger_create_thread(acpi_osd_exec_callback function, void *context) { return -ENODEV; } static inline int acpi_debugger_write_log(const char *msg) { return -ENODEV; } static inline int acpi_debugger_read_cmd(char *buffer, u32 buffer_length) { return -ENODEV; } static inline int acpi_debugger_wait_command_ready(void) { return -ENODEV; } static inline int acpi_debugger_notify_command_complete(void) { return -ENODEV; } #endif #define BAD_MADT_ENTRY(entry, end) ( \ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ ((struct acpi_subtable_header *)entry)->length < sizeof(*entry)) struct acpi_subtable_proc { int id; acpi_tbl_entry_handler handler; int count; }; void __iomem *__acpi_map_table(unsigned long phys, unsigned long size); void __acpi_unmap_table(void __iomem *map, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); void acpi_boot_table_prepare (void); void acpi_boot_table_init (void); int acpi_mps_check (void); int acpi_numa_init (void); int acpi_locate_initial_tables (void); void acpi_reserve_initial_tables (void); void acpi_table_init_complete (void); int acpi_table_init (void); int acpi_table_parse(char *id, acpi_tbl_table_handler handler); int __init acpi_table_parse_entries(char *id, unsigned long table_size, int entry_id, acpi_tbl_entry_handler handler, unsigned int max_entries); int __init acpi_table_parse_entries_array(char *id, unsigned long table_size, struct acpi_subtable_proc *proc, int proc_num, unsigned int max_entries); int acpi_table_parse_madt(enum acpi_madt_type id, acpi_tbl_entry_handler handler, unsigned int max_entries); int acpi_parse_mcfg (struct acpi_table_header *header); void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); /* the following numa functions are architecture-dependent */ void acpi_numa_slit_init (struct acpi_table_slit *slit); #if defined(CONFIG_X86) || defined(CONFIG_IA64) void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa); #else static inline void acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) { } #endif void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa); #ifdef CONFIG_ARM64 void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa); #else static inline void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { } #endif int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); #ifndef PHYS_CPUID_INVALID typedef u32 phys_cpuid_t; #define PHYS_CPUID_INVALID (phys_cpuid_t)(-1) #endif static inline bool invalid_logical_cpuid(u32 cpuid) { return (int)cpuid < 0; } static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id) { return phys_id == PHYS_CPUID_INVALID; } /* Validate the processor object's proc_id */ bool acpi_duplicate_processor_id(int proc_id); /* Processor _CTS control */ struct acpi_processor_power; #ifdef CONFIG_ACPI_PROCESSOR_CSTATE bool acpi_processor_claim_cst_control(void); int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu, struct acpi_processor_power *info); #else static inline bool acpi_processor_claim_cst_control(void) { return false; } static inline int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu, struct acpi_processor_power *info) { return -ENODEV; } #endif #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu); int acpi_unmap_cpu(int cpu); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr); #endif int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base); int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base); int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base); void acpi_irq_stats_init(void); extern u32 acpi_irq_handled; extern u32 acpi_irq_not_handled; extern unsigned int acpi_sci_irq; extern bool acpi_no_s5; #define INVALID_ACPI_IRQ ((unsigned)-1) static inline bool acpi_sci_irq_valid(void) { return acpi_sci_irq != INVALID_ACPI_IRQ; } extern int sbf_port; extern unsigned long acpi_realmode_flags; int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity); int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi); void acpi_set_irq_model(enum acpi_irq_model_id model, struct fwnode_handle *fwnode); struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags, unsigned int size, struct fwnode_handle *fwnode, const struct irq_domain_ops *ops, void *host_data); #ifdef CONFIG_X86_IO_APIC extern int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity); #else static inline int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) { return -1; } #endif /* * This function undoes the effect of one call to acpi_register_gsi(). * If this matches the last registration, any IRQ resources for gsi * are freed. */ void acpi_unregister_gsi (u32 gsi); struct pci_dev; int acpi_pci_irq_enable (struct pci_dev *dev); void acpi_penalize_isa_irq(int irq, int active); bool acpi_isa_irq_available(int irq); #ifdef CONFIG_PCI void acpi_penalize_sci_irq(int irq, int trigger, int polarity); #else static inline void acpi_penalize_sci_irq(int irq, int trigger, int polarity) { } #endif void acpi_pci_irq_disable (struct pci_dev *dev); extern int ec_read(u8 addr, u8 *val); extern int ec_write(u8 addr, u8 val); extern int ec_transaction(u8 command, const u8 *wdata, unsigned wdata_len, u8 *rdata, unsigned rdata_len); extern acpi_handle ec_get_handle(void); extern bool acpi_is_pnp_device(struct acpi_device *); #if defined(CONFIG_ACPI_WMI) || defined(CONFIG_ACPI_WMI_MODULE) typedef void (*wmi_notify_handler) (u32 value, void *context); extern acpi_status wmi_evaluate_method(const char *guid, u8 instance, u32 method_id, const struct acpi_buffer *in, struct acpi_buffer *out); extern acpi_status wmi_query_block(const char *guid, u8 instance, struct acpi_buffer *out); extern acpi_status wmi_set_block(const char *guid, u8 instance, const struct acpi_buffer *in); extern acpi_status wmi_install_notify_handler(const char *guid, wmi_notify_handler handler, void *data); extern acpi_status wmi_remove_notify_handler(const char *guid); extern acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out); extern bool wmi_has_guid(const char *guid); extern char *wmi_get_acpi_device_uid(const char *guid); #endif /* CONFIG_ACPI_WMI */ #define ACPI_VIDEO_OUTPUT_SWITCHING 0x0001 #define ACPI_VIDEO_DEVICE_POSTING 0x0002 #define ACPI_VIDEO_ROM_AVAILABLE 0x0004 #define ACPI_VIDEO_BACKLIGHT 0x0008 #define ACPI_VIDEO_BACKLIGHT_FORCE_VENDOR 0x0010 #define ACPI_VIDEO_BACKLIGHT_FORCE_VIDEO 0x0020 #define ACPI_VIDEO_OUTPUT_SWITCHING_FORCE_VENDOR 0x0040 #define ACPI_VIDEO_OUTPUT_SWITCHING_FORCE_VIDEO 0x0080 #define ACPI_VIDEO_BACKLIGHT_DMI_VENDOR 0x0100 #define ACPI_VIDEO_BACKLIGHT_DMI_VIDEO 0x0200 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR 0x0400 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VIDEO 0x0800 extern char acpi_video_backlight_string[]; extern long acpi_is_video_device(acpi_handle handle); extern int acpi_blacklisted(void); extern void acpi_osi_setup(char *str); extern bool acpi_osi_is_win8(void); #ifdef CONFIG_ACPI_NUMA int acpi_map_pxm_to_node(int pxm); int acpi_get_node(acpi_handle handle); /** * pxm_to_online_node - Map proximity ID to online node * @pxm: ACPI proximity ID * * This is similar to pxm_to_node(), but always returns an online * node. When the mapped node from a given proximity ID is offline, it * looks up the node distance table and returns the nearest online node. * * ACPI device drivers, which are called after the NUMA initialization has * completed in the kernel, can call this interface to obtain their device * NUMA topology from ACPI tables. Such drivers do not have to deal with * offline nodes. A node may be offline when SRAT memory entry does not exist, * or NUMA is disabled, ex. "numa=off" on x86. */ static inline int pxm_to_online_node(int pxm) { int node = pxm_to_node(pxm); return numa_map_to_online_node(node); } #else static inline int pxm_to_online_node(int pxm) { return 0; } static inline int acpi_map_pxm_to_node(int pxm) { return 0; } static inline int acpi_get_node(acpi_handle handle) { return 0; } #endif extern int acpi_paddr_to_node(u64 start_addr, u64 size); extern int pnpacpi_disabled; #define PXM_INVAL (-1) bool acpi_dev_resource_memory(struct acpi_resource *ares, struct resource *res); bool acpi_dev_resource_io(struct acpi_resource *ares, struct resource *res); bool acpi_dev_resource_address_space(struct acpi_resource *ares, struct resource_win *win); bool acpi_dev_resource_ext_address_space(struct acpi_resource *ares, struct resource_win *win); unsigned long acpi_dev_irq_flags(u8 triggering, u8 polarity, u8 shareable); unsigned int acpi_dev_get_irq_type(int triggering, int polarity); bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index, struct resource *res); void acpi_dev_free_resource_list(struct list_head *list); int acpi_dev_get_resources(struct acpi_device *adev, struct list_head *list, int (*preproc)(struct acpi_resource *, void *), void *preproc_data); int acpi_dev_get_dma_resources(struct acpi_device *adev, struct list_head *list); int acpi_dev_filter_resource_type(struct acpi_resource *ares, unsigned long types); static inline int acpi_dev_filter_resource_type_cb(struct acpi_resource *ares, void *arg) { return acpi_dev_filter_resource_type(ares, (unsigned long)arg); } struct acpi_device *acpi_resource_consumer(struct resource *res); int acpi_check_resource_conflict(const struct resource *res); int acpi_check_region(resource_size_t start, resource_size_t n, const char *name); acpi_status acpi_release_memory(acpi_handle handle, struct resource *res, u32 level); int acpi_resources_are_enforced(void); #ifdef CONFIG_HIBERNATION void __init acpi_no_s4_hw_signature(void); #endif #ifdef CONFIG_PM_SLEEP void __init acpi_old_suspend_ordering(void); void __init acpi_nvs_nosave(void); void __init acpi_nvs_nosave_s3(void); void __init acpi_sleep_no_blacklist(void); #endif /* CONFIG_PM_SLEEP */ int acpi_register_wakeup_handler( int wake_irq, bool (*wakeup)(void *context), void *context); void acpi_unregister_wakeup_handler( bool (*wakeup)(void *context), void *context); struct acpi_osc_context { char *uuid_str; /* UUID string */ int rev; struct acpi_buffer cap; /* list of DWORD capabilities */ struct acpi_buffer ret; /* free by caller if success */ }; acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); /* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */ #define OSC_QUERY_DWORD 0 /* DWORD 1 */ #define OSC_SUPPORT_DWORD 1 /* DWORD 2 */ #define OSC_CONTROL_DWORD 2 /* DWORD 3 */ /* _OSC Capabilities DWORD 1: Query/Control and Error Returns (generic) */ #define OSC_QUERY_ENABLE 0x00000001 /* input */ #define OSC_REQUEST_ERROR 0x00000002 /* return */ #define OSC_INVALID_UUID_ERROR 0x00000004 /* return */ #define OSC_INVALID_REVISION_ERROR 0x00000008 /* return */ #define OSC_CAPABILITIES_MASK_ERROR 0x00000010 /* return */ /* Platform-Wide Capabilities _OSC: Capabilities DWORD 2: Support Field */ #define OSC_SB_PAD_SUPPORT 0x00000001 #define OSC_SB_PPC_OST_SUPPORT 0x00000002 #define OSC_SB_PR3_SUPPORT 0x00000004 #define OSC_SB_HOTPLUG_OST_SUPPORT 0x00000008 #define OSC_SB_APEI_SUPPORT 0x00000010 #define OSC_SB_CPC_SUPPORT 0x00000020 #define OSC_SB_CPCV2_SUPPORT 0x00000040 #define OSC_SB_PCLPI_SUPPORT 0x00000080 #define OSC_SB_OSLPI_SUPPORT 0x00000100 #define OSC_SB_CPC_DIVERSE_HIGH_SUPPORT 0x00001000 #define OSC_SB_GENERIC_INITIATOR_SUPPORT 0x00002000 extern bool osc_sb_apei_support_acked; extern bool osc_pc_lpi_support_confirmed; /* PCI Host Bridge _OSC: Capabilities DWORD 2: Support Field */ #define OSC_PCI_EXT_CONFIG_SUPPORT 0x00000001 #define OSC_PCI_ASPM_SUPPORT 0x00000002 #define OSC_PCI_CLOCK_PM_SUPPORT 0x00000004 #define OSC_PCI_SEGMENT_GROUPS_SUPPORT 0x00000008 #define OSC_PCI_MSI_SUPPORT 0x00000010 #define OSC_PCI_EDR_SUPPORT 0x00000080 #define OSC_PCI_HPX_TYPE_3_SUPPORT 0x00000100 #define OSC_PCI_SUPPORT_MASKS 0x0000019f /* PCI Host Bridge _OSC: Capabilities DWORD 3: Control Field */ #define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL 0x00000001 #define OSC_PCI_SHPC_NATIVE_HP_CONTROL 0x00000002 #define OSC_PCI_EXPRESS_PME_CONTROL 0x00000004 #define OSC_PCI_EXPRESS_AER_CONTROL 0x00000008 #define OSC_PCI_EXPRESS_CAPABILITY_CONTROL 0x00000010 #define OSC_PCI_EXPRESS_LTR_CONTROL 0x00000020 #define OSC_PCI_EXPRESS_DPC_CONTROL 0x00000080 #define OSC_PCI_CONTROL_MASKS 0x000000bf #define ACPI_GSB_ACCESS_ATTRIB_QUICK 0x00000002 #define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV 0x00000004 #define ACPI_GSB_ACCESS_ATTRIB_BYTE 0x00000006 #define ACPI_GSB_ACCESS_ATTRIB_WORD 0x00000008 #define ACPI_GSB_ACCESS_ATTRIB_BLOCK 0x0000000A #define ACPI_GSB_ACCESS_ATTRIB_MULTIBYTE 0x0000000B #define ACPI_GSB_ACCESS_ATTRIB_WORD_CALL 0x0000000C #define ACPI_GSB_ACCESS_ATTRIB_BLOCK_CALL 0x0000000D #define ACPI_GSB_ACCESS_ATTRIB_RAW_BYTES 0x0000000E #define ACPI_GSB_ACCESS_ATTRIB_RAW_PROCESS 0x0000000F extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 req); /* Enable _OST when all relevant hotplug operations are enabled */ #if defined(CONFIG_ACPI_HOTPLUG_CPU) && \ defined(CONFIG_ACPI_HOTPLUG_MEMORY) && \ defined(CONFIG_ACPI_CONTAINER) #define ACPI_HOTPLUG_OST #endif /* _OST Source Event Code (OSPM Action) */ #define ACPI_OST_EC_OSPM_SHUTDOWN 0x100 #define ACPI_OST_EC_OSPM_EJECT 0x103 #define ACPI_OST_EC_OSPM_INSERTION 0x200 /* _OST General Processing Status Code */ #define ACPI_OST_SC_SUCCESS 0x0 #define ACPI_OST_SC_NON_SPECIFIC_FAILURE 0x1 #define ACPI_OST_SC_UNRECOGNIZED_NOTIFY 0x2 /* _OST OS Shutdown Processing (0x100) Status Code */ #define ACPI_OST_SC_OS_SHUTDOWN_DENIED 0x80 #define ACPI_OST_SC_OS_SHUTDOWN_IN_PROGRESS 0x81 #define ACPI_OST_SC_OS_SHUTDOWN_COMPLETED 0x82 #define ACPI_OST_SC_OS_SHUTDOWN_NOT_SUPPORTED 0x83 /* _OST Ejection Request (0x3, 0x103) Status Code */ #define ACPI_OST_SC_EJECT_NOT_SUPPORTED 0x80 #define ACPI_OST_SC_DEVICE_IN_USE 0x81 #define ACPI_OST_SC_DEVICE_BUSY 0x82 #define ACPI_OST_SC_EJECT_DEPENDENCY_BUSY 0x83 #define ACPI_OST_SC_EJECT_IN_PROGRESS 0x84 /* _OST Insertion Request (0x200) Status Code */ #define ACPI_OST_SC_INSERT_IN_PROGRESS 0x80 #define ACPI_OST_SC_DRIVER_LOAD_FAILURE 0x81 #define ACPI_OST_SC_INSERT_NOT_SUPPORTED 0x82 enum acpi_predicate { all_versions, less_than_or_equal, equal, greater_than_or_equal, }; /* Table must be terminted by a NULL entry */ struct acpi_platform_list { char oem_id[ACPI_OEM_ID_SIZE+1]; char oem_table_id[ACPI_OEM_TABLE_ID_SIZE+1]; u32 oem_revision; char *table; enum acpi_predicate pred; char *reason; u32 data; }; int acpi_match_platform_list(const struct acpi_platform_list *plat); extern void acpi_early_init(void); extern void acpi_subsystem_init(void); extern void arch_post_acpi_subsys_init(void); extern int acpi_nvs_register(__u64 start, __u64 size); extern int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *), void *data); const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids, const struct device *dev); const void *acpi_device_get_match_data(const struct device *dev); extern bool acpi_driver_match_device(struct device *dev, const struct device_driver *drv); int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *); int acpi_device_modalias(struct device *, char *, int); void acpi_walk_dep_device_list(acpi_handle handle); struct platform_device *acpi_create_platform_device(struct acpi_device *, struct property_entry *); #define ACPI_PTR(_ptr) (_ptr) static inline void acpi_device_set_enumerated(struct acpi_device *adev) { adev->flags.visited = true; } static inline void acpi_device_clear_enumerated(struct acpi_device *adev) { adev->flags.visited = false; } enum acpi_reconfig_event { ACPI_RECONFIG_DEVICE_ADD = 0, ACPI_RECONFIG_DEVICE_REMOVE, }; int acpi_reconfig_notifier_register(struct notifier_block *nb); int acpi_reconfig_notifier_unregister(struct notifier_block *nb); #ifdef CONFIG_ACPI_GTDT int acpi_gtdt_init(struct acpi_table_header *table, int *platform_timer_count); int acpi_gtdt_map_ppi(int type); bool acpi_gtdt_c3stop(int type); int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count); #endif #ifndef ACPI_HAVE_ARCH_SET_ROOT_POINTER static inline void acpi_arch_set_root_pointer(u64 addr) { } #endif #ifndef ACPI_HAVE_ARCH_GET_ROOT_POINTER static inline u64 acpi_arch_get_root_pointer(void) { return 0; } #endif #else /* !CONFIG_ACPI */ #define acpi_disabled 1 #define ACPI_COMPANION(dev) (NULL) #define ACPI_COMPANION_SET(dev, adev) do { } while (0) #define ACPI_HANDLE(dev) (NULL) #define ACPI_HANDLE_FWNODE(fwnode) (NULL) #define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (0), .cls_msk = (0), #include <acpi/acpi_numa.h> struct fwnode_handle; static inline bool acpi_dev_found(const char *hid) { return false; } static inline bool acpi_dev_present(const char *hid, const char *uid, s64 hrv) { return false; } struct acpi_device; static inline bool acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *uid2) { return false; } static inline struct acpi_device * acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv) { return NULL; } static inline void acpi_dev_put(struct acpi_device *adev) {} static inline bool is_acpi_node(struct fwnode_handle *fwnode) { return false; } static inline bool is_acpi_device_node(struct fwnode_handle *fwnode) { return false; } static inline struct acpi_device *to_acpi_device_node(struct fwnode_handle *fwnode) { return NULL; } static inline bool is_acpi_data_node(struct fwnode_handle *fwnode) { return false; } static inline struct acpi_data_node *to_acpi_data_node(struct fwnode_handle *fwnode) { return NULL; } static inline bool acpi_data_node_match(struct fwnode_handle *fwnode, const char *name) { return false; } static inline struct fwnode_handle *acpi_fwnode_handle(struct acpi_device *adev) { return NULL; } static inline bool has_acpi_companion(struct device *dev) { return false; } static inline void acpi_preset_companion(struct device *dev, struct acpi_device *parent, u64 addr) { } static inline const char *acpi_dev_name(struct acpi_device *adev) { return NULL; } static inline struct device *acpi_get_first_physical_node(struct acpi_device *adev) { return NULL; } static inline void acpi_early_init(void) { } static inline void acpi_subsystem_init(void) { } static inline int early_acpi_boot_init(void) { return 0; } static inline int acpi_boot_init(void) { return 0; } static inline void acpi_boot_table_prepare(void) { } static inline void acpi_boot_table_init(void) { } static inline int acpi_mps_check(void) { return 0; } static inline int acpi_check_resource_conflict(struct resource *res) { return 0; } static inline int acpi_check_region(resource_size_t start, resource_size_t n, const char *name) { return 0; } struct acpi_table_header; static inline int acpi_table_parse(char *id, int (*handler)(struct acpi_table_header *)) { return -ENODEV; } static inline int acpi_nvs_register(__u64 start, __u64 size) { return 0; } static inline int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *), void *data) { return 0; } struct acpi_device_id; static inline const struct acpi_device_id *acpi_match_device( const struct acpi_device_id *ids, const struct device *dev) { return NULL; } static inline const void *acpi_device_get_match_data(const struct device *dev) { return NULL; } static inline bool acpi_driver_match_device(struct device *dev, const struct device_driver *drv) { return false; } static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4) { return NULL; } static inline int acpi_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env) { return -ENODEV; } static inline int acpi_device_modalias(struct device *dev, char *buf, int size) { return -ENODEV; } static inline struct platform_device * acpi_create_platform_device(struct acpi_device *adev, struct property_entry *properties) { return NULL; } static inline bool acpi_dma_supported(struct acpi_device *adev) { return false; } static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) { return DEV_DMA_NOT_SUPPORTED; } static inline int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, u64 *size) { return -ENODEV; } static inline int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr) { return 0; } static inline int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, const u32 *input_id) { return 0; } #define ACPI_PTR(_ptr) (NULL) static inline void acpi_device_set_enumerated(struct acpi_device *adev) { } static inline void acpi_device_clear_enumerated(struct acpi_device *adev) { } static inline int acpi_reconfig_notifier_register(struct notifier_block *nb) { return -EINVAL; } static inline int acpi_reconfig_notifier_unregister(struct notifier_block *nb) { return -EINVAL; } static inline struct acpi_device *acpi_resource_consumer(struct resource *res) { return NULL; } static inline int acpi_register_wakeup_handler(int wake_irq, bool (*wakeup)(void *context), void *context) { return -ENXIO; } static inline void acpi_unregister_wakeup_handler( bool (*wakeup)(void *context), void *context) { } #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_ioapic_add(acpi_handle root); #else static inline int acpi_ioapic_add(acpi_handle root) { return 0; } #endif #ifdef CONFIG_ACPI void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state, u32 pm1a_ctrl, u32 pm1b_ctrl)); acpi_status acpi_os_prepare_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control); void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state, u32 val_a, u32 val_b)); acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b); #ifndef CONFIG_IA64 void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else static inline void arch_reserve_mem_area(acpi_physical_address addr, size_t size) { } #endif /* CONFIG_X86 */ #else #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0) #endif #if defined(CONFIG_ACPI) && defined(CONFIG_PM) int acpi_dev_suspend(struct device *dev, bool wakeup); int acpi_dev_resume(struct device *dev); int acpi_subsys_runtime_suspend(struct device *dev); int acpi_subsys_runtime_resume(struct device *dev); int acpi_dev_pm_attach(struct device *dev, bool power_on); #else static inline int acpi_subsys_runtime_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; } static inline int acpi_dev_pm_attach(struct device *dev, bool power_on) { return 0; } #endif #if defined(CONFIG_ACPI) && defined(CONFIG_PM_SLEEP) int acpi_subsys_prepare(struct device *dev); void acpi_subsys_complete(struct device *dev); int acpi_subsys_suspend_late(struct device *dev); int acpi_subsys_suspend_noirq(struct device *dev); int acpi_subsys_suspend(struct device *dev); int acpi_subsys_freeze(struct device *dev); int acpi_subsys_poweroff(struct device *dev); void acpi_ec_mark_gpe_for_wake(void); void acpi_ec_set_gpe_wake_mask(u8 action); #else static inline int acpi_subsys_prepare(struct device *dev) { return 0; } static inline void acpi_subsys_complete(struct device *dev) {} static inline int acpi_subsys_suspend_late(struct device *dev) { return 0; } static inline int acpi_subsys_suspend_noirq(struct device *dev) { return 0; } static inline int acpi_subsys_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_freeze(struct device *dev) { return 0; } static inline int acpi_subsys_poweroff(struct device *dev) { return 0; } static inline void acpi_ec_mark_gpe_for_wake(void) {} static inline void acpi_ec_set_gpe_wake_mask(u8 action) {} #endif #ifdef CONFIG_ACPI __printf(3, 4) void acpi_handle_printk(const char *level, acpi_handle handle, const char *fmt, ...); #else /* !CONFIG_ACPI */ static inline __printf(3, 4) void acpi_handle_printk(const char *level, void *handle, const char *fmt, ...) {} #endif /* !CONFIG_ACPI */ #if defined(CONFIG_ACPI) && defined(CONFIG_DYNAMIC_DEBUG) __printf(3, 4) void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const char *fmt, ...); #endif /* * acpi_handle_<level>: Print message with ACPI prefix and object path * * These interfaces acquire the global namespace mutex to obtain an object * path. In interrupt context, it shows the object path as <n/a>. */ #define acpi_handle_emerg(handle, fmt, ...) \ acpi_handle_printk(KERN_EMERG, handle, fmt, ##__VA_ARGS__) #define acpi_handle_alert(handle, fmt, ...) \ acpi_handle_printk(KERN_ALERT, handle, fmt, ##__VA_ARGS__) #define acpi_handle_crit(handle, fmt, ...) \ acpi_handle_printk(KERN_CRIT, handle, fmt, ##__VA_ARGS__) #define acpi_handle_err(handle, fmt, ...) \ acpi_handle_printk(KERN_ERR, handle, fmt, ##__VA_ARGS__) #define acpi_handle_warn(handle, fmt, ...) \ acpi_handle_printk(KERN_WARNING, handle, fmt, ##__VA_ARGS__) #define acpi_handle_notice(handle, fmt, ...) \ acpi_handle_printk(KERN_NOTICE, handle, fmt, ##__VA_ARGS__) #define acpi_handle_info(handle, fmt, ...) \ acpi_handle_printk(KERN_INFO, handle, fmt, ##__VA_ARGS__) #if defined(DEBUG) #define acpi_handle_debug(handle, fmt, ...) \ acpi_handle_printk(KERN_DEBUG, handle, fmt, ##__VA_ARGS__) #else #if defined(CONFIG_DYNAMIC_DEBUG) #define acpi_handle_debug(handle, fmt, ...) \ _dynamic_func_call(fmt, __acpi_handle_debug, \ handle, pr_fmt(fmt), ##__VA_ARGS__) #else #define acpi_handle_debug(handle, fmt, ...) \ ({ \ if (0) \ acpi_handle_printk(KERN_DEBUG, handle, fmt, ##__VA_ARGS__); \ 0; \ }) #endif #endif #if defined(CONFIG_ACPI) && defined(CONFIG_GPIOLIB) bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio); int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index); #else static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio) { return false; } static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index) { return -ENXIO; } #endif static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) { return acpi_dev_gpio_irq_get_by(adev, NULL, index); } /* Device properties */ #ifdef CONFIG_ACPI int acpi_dev_get_property(const struct acpi_device *adev, const char *name, acpi_object_type type, const union acpi_object **obj); int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, const char *name, size_t index, size_t num_args, struct fwnode_reference_args *args); static inline int acpi_node_get_property_reference( const struct fwnode_handle *fwnode, const char *name, size_t index, struct fwnode_reference_args *args) { return __acpi_node_get_property_reference(fwnode, name, index, NR_FWNODE_REFERENCE_ARGS, args); } static inline bool acpi_dev_has_props(const struct acpi_device *adev) { return !list_empty(&adev->data.properties); } struct acpi_device_properties * acpi_data_add_props(struct acpi_device_data *data, const guid_t *guid, const union acpi_object *properties); int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname, void **valptr); int acpi_dev_prop_read_single(struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, void *val); int acpi_node_prop_read(const struct fwnode_handle *fwnode, const char *propname, enum dev_prop_type proptype, void *val, size_t nval); int acpi_dev_prop_read(const struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, void *val, size_t nval); struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode, struct fwnode_handle *child); struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode); struct acpi_probe_entry; typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *, struct acpi_probe_entry *); #define ACPI_TABLE_ID_LEN 5 /** * struct acpi_probe_entry - boot-time probing entry * @id: ACPI table name * @type: Optional subtable type to match * (if @id contains subtables) * @subtable_valid: Optional callback to check the validity of * the subtable * @probe_table: Callback to the driver being probed when table * match is successful * @probe_subtbl: Callback to the driver being probed when table and * subtable match (and optional callback is successful) * @driver_data: Sideband data provided back to the driver */ struct acpi_probe_entry { __u8 id[ACPI_TABLE_ID_LEN]; __u8 type; acpi_probe_entry_validate_subtbl subtable_valid; union { acpi_tbl_table_handler probe_table; acpi_tbl_entry_handler probe_subtbl; }; kernel_ulong_t driver_data; }; #define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, \ valid, data, fn) \ static const struct acpi_probe_entry __acpi_probe_##name \ __used __section("__" #table "_acpi_probe_table") = { \ .id = table_id, \ .type = subtable, \ .subtable_valid = valid, \ .probe_table = fn, \ .driver_data = data, \ } #define ACPI_DECLARE_SUBTABLE_PROBE_ENTRY(table, name, table_id, \ subtable, valid, data, fn) \ static const struct acpi_probe_entry __acpi_probe_##name \ __used __section("__" #table "_acpi_probe_table") = { \ .id = table_id, \ .type = subtable, \ .subtable_valid = valid, \ .probe_subtbl = fn, \ .driver_data = data, \ } #define ACPI_PROBE_TABLE(name) __##name##_acpi_probe_table #define ACPI_PROBE_TABLE_END(name) __##name##_acpi_probe_table_end int __acpi_probe_device_table(struct acpi_probe_entry *start, int nr); #define acpi_probe_device_table(t) \ ({ \ extern struct acpi_probe_entry ACPI_PROBE_TABLE(t), \ ACPI_PROBE_TABLE_END(t); \ __acpi_probe_device_table(&ACPI_PROBE_TABLE(t), \ (&ACPI_PROBE_TABLE_END(t) - \ &ACPI_PROBE_TABLE(t))); \ }) #else static inline int acpi_dev_get_property(struct acpi_device *adev, const char *name, acpi_object_type type, const union acpi_object **obj) { return -ENXIO; } static inline int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, const char *name, size_t index, size_t num_args, struct fwnode_reference_args *args) { return -ENXIO; } static inline int acpi_node_get_property_reference(const struct fwnode_handle *fwnode, const char *name, size_t index, struct fwnode_reference_args *args) { return -ENXIO; } static inline int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname, void **valptr) { return -ENXIO; } static inline int acpi_dev_prop_read_single(const struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, void *val) { return -ENXIO; } static inline int acpi_node_prop_read(const struct fwnode_handle *fwnode, const char *propname, enum dev_prop_type proptype, void *val, size_t nval) { return -ENXIO; } static inline int acpi_dev_prop_read(const struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, void *val, size_t nval) { return -ENXIO; } static inline struct fwnode_handle * acpi_get_next_subnode(const struct fwnode_handle *fwnode, struct fwnode_handle *child) { return NULL; } static inline struct fwnode_handle * acpi_node_get_parent(const struct fwnode_handle *fwnode) { return NULL; } static inline struct fwnode_handle * acpi_graph_get_next_endpoint(const struct fwnode_handle *fwnode, struct fwnode_handle *prev) { return ERR_PTR(-ENXIO); } static inline int acpi_graph_get_remote_endpoint(const struct fwnode_handle *fwnode, struct fwnode_handle **remote, struct fwnode_handle **port, struct fwnode_handle **endpoint) { return -ENXIO; } #define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, valid, data, fn) \ static const void * __acpi_table_##name[] \ __attribute__((unused)) \ = { (void *) table_id, \ (void *) subtable, \ (void *) valid, \ (void *) fn, \ (void *) data } #define acpi_probe_device_table(t) ({ int __r = 0; __r;}) #endif #ifdef CONFIG_ACPI_TABLE_UPGRADE void acpi_table_upgrade(void); #else static inline void acpi_table_upgrade(void) { } #endif #if defined(CONFIG_ACPI) && defined(CONFIG_ACPI_WATCHDOG) extern bool acpi_has_watchdog(void); #else static inline bool acpi_has_watchdog(void) { return false; } #endif #ifdef CONFIG_ACPI_SPCR_TABLE extern bool qdf2400_e44_present; int acpi_parse_spcr(bool enable_earlycon, bool enable_console); #else static inline int acpi_parse_spcr(bool enable_earlycon, bool enable_console) { return 0; } #endif #if IS_ENABLED(CONFIG_ACPI_GENERIC_GSI) int acpi_irq_get(acpi_handle handle, unsigned int index, struct resource *res); #else static inline int acpi_irq_get(acpi_handle handle, unsigned int index, struct resource *res) { return -EINVAL; } #endif #ifdef CONFIG_ACPI_LPIT int lpit_read_residency_count_address(u64 *address); #else static inline int lpit_read_residency_count_address(u64 *address) { return -EINVAL; } #endif #ifdef CONFIG_ACPI_PPTT int acpi_pptt_cpu_is_thread(unsigned int cpu); int find_acpi_cpu_topology(unsigned int cpu, int level); int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_topology_hetero_id(unsigned int cpu); int find_acpi_cpu_cache_topology(unsigned int cpu, int level); #else static inline int acpi_pptt_cpu_is_thread(unsigned int cpu) { return -EINVAL; } static inline int find_acpi_cpu_topology(unsigned int cpu, int level) { return -EINVAL; } static inline int find_acpi_cpu_topology_package(unsigned int cpu) { return -EINVAL; } static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu) { return -EINVAL; } static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level) { return -EINVAL; } #endif #ifdef CONFIG_ACPI extern int acpi_platform_notify(struct device *dev, enum kobject_action action); #else static inline int acpi_platform_notify(struct device *dev, enum kobject_action action) { return 0; } #endif #endif /*_LINUX_ACPI_H*/
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 /* SPDX-License-Identifier: GPL-2.0 */ /** * lib/minmax.c: windowed min/max tracker by Kathleen Nichols. * */ #ifndef MINMAX_H #define MINMAX_H #include <linux/types.h> /* A single data point for our parameterized min-max tracker */ struct minmax_sample { u32 t; /* time measurement was taken */ u32 v; /* value measured */ }; /* State for the parameterized min-max tracker */ struct minmax { struct minmax_sample s[3]; }; static inline u32 minmax_get(const struct minmax *m) { return m->s[0].v; } static inline u32 minmax_reset(struct minmax *m, u32 t, u32 meas) { struct minmax_sample val = { .t = t, .v = meas }; m->s[2] = m->s[1] = m->s[0] = val; return m->s[0].v; } u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas); u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __VDSO_HELPERS_H #define __VDSO_HELPERS_H #ifndef __ASSEMBLY__ #include <vdso/datapage.h> static __always_inline u32 vdso_read_begin(const struct vdso_data *vd) { u32 seq; while (unlikely((seq = READ_ONCE(vd->seq)) & 1)) cpu_relax(); smp_rmb(); return seq; } static __always_inline u32 vdso_read_retry(const struct vdso_data *vd, u32 start) { u32 seq; smp_rmb(); seq = READ_ONCE(vd->seq); return seq != start; } static __always_inline void vdso_write_begin(struct vdso_data *vd) { /* * WRITE_ONCE it is required otherwise the compiler can validly tear * updates to vd[x].seq and it is possible that the value seen by the * reader it is inconsistent. */ WRITE_ONCE(vd[CS_HRES_COARSE].seq, vd[CS_HRES_COARSE].seq + 1); WRITE_ONCE(vd[CS_RAW].seq, vd[CS_RAW].seq + 1); smp_wmb(); } static __always_inline void vdso_write_end(struct vdso_data *vd) { smp_wmb(); /* * WRITE_ONCE it is required otherwise the compiler can validly tear * updates to vd[x].seq and it is possible that the value seen by the * reader it is inconsistent. */ WRITE_ONCE(vd[CS_HRES_COARSE].seq, vd[CS_HRES_COARSE].seq + 1); WRITE_ONCE(vd[CS_RAW].seq, vd[CS_RAW].seq + 1); } #endif /* !__ASSEMBLY__ */ #endif /* __VDSO_HELPERS_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Credentials management - see Documentation/security/credentials.rst * * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef _LINUX_CRED_H #define _LINUX_CRED_H #include <linux/capability.h> #include <linux/init.h> #include <linux/key.h> #include <linux/atomic.h> #include <linux/uidgid.h> #include <linux/sched.h> #include <linux/sched/user.h> struct cred; struct inode; /* * COW Supplementary groups list */ struct group_info { atomic_t usage; int ngroups; kgid_t gid[0]; } __randomize_layout; /** * get_group_info - Get a reference to a group info structure * @group_info: The group info to reference * * This gets a reference to a set of supplementary groups. * * If the caller is accessing a task's credentials, they must hold the RCU read * lock when reading. */ static inline struct group_info *get_group_info(struct group_info *gi) { atomic_inc(&gi->usage); return gi; } /** * put_group_info - Release a reference to a group info structure * @group_info: The group info to release */ #define put_group_info(group_info) \ do { \ if (atomic_dec_and_test(&(group_info)->usage)) \ groups_free(group_info); \ } while (0) extern struct group_info init_groups; #ifdef CONFIG_MULTIUSER extern struct group_info *groups_alloc(int); extern void groups_free(struct group_info *); extern int in_group_p(kgid_t); extern int in_egroup_p(kgid_t); extern int groups_search(const struct group_info *, kgid_t); extern int set_current_groups(struct group_info *); extern void set_groups(struct cred *, struct group_info *); extern bool may_setgroups(void); extern void groups_sort(struct group_info *); #else static inline void groups_free(struct group_info *group_info) { } static inline int in_group_p(kgid_t grp) { return 1; } static inline int in_egroup_p(kgid_t grp) { return 1; } static inline int groups_search(const struct group_info *group_info, kgid_t grp) { return 1; } #endif /* * The security context of a task * * The parts of the context break down into two categories: * * (1) The objective context of a task. These parts are used when some other * task is attempting to affect this one. * * (2) The subjective context. These details are used when the task is acting * upon another object, be that a file, a task, a key or whatever. * * Note that some members of this structure belong to both categories - the * LSM security pointer for instance. * * A task has two security pointers. task->real_cred points to the objective * context that defines that task's actual details. The objective part of this * context is used whenever that task is acted upon. * * task->cred points to the subjective context that defines the details of how * that task is going to act upon another object. This may be overridden * temporarily to point to another security context, but normally points to the * same context as task->real_cred. */ struct cred { atomic_t usage; #ifdef CONFIG_DEBUG_CREDENTIALS atomic_t subscribers; /* number of processes subscribed */ void *put_addr; unsigned magic; #define CRED_MAGIC 0x43736564 #define CRED_MAGIC_DEAD 0x44656144 #endif kuid_t uid; /* real UID of the task */ kgid_t gid; /* real GID of the task */ kuid_t suid; /* saved UID of the task */ kgid_t sgid; /* saved GID of the task */ kuid_t euid; /* effective UID of the task */ kgid_t egid; /* effective GID of the task */ kuid_t fsuid; /* UID for VFS ops */ kgid_t fsgid; /* GID for VFS ops */ unsigned securebits; /* SUID-less security management */ kernel_cap_t cap_inheritable; /* caps our children can inherit */ kernel_cap_t cap_permitted; /* caps we're permitted */ kernel_cap_t cap_effective; /* caps we can actually use */ kernel_cap_t cap_bset; /* capability bounding set */ kernel_cap_t cap_ambient; /* Ambient capability set */ #ifdef CONFIG_KEYS unsigned char jit_keyring; /* default keyring to attach requested * keys to */ struct key *session_keyring; /* keyring inherited over fork */ struct key *process_keyring; /* keyring private to this process */ struct key *thread_keyring; /* keyring private to this thread */ struct key *request_key_auth; /* assumed request_key authority */ #endif #ifdef CONFIG_SECURITY void *security; /* subjective LSM security */ #endif struct user_struct *user; /* real user ID subscription */ struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */ struct group_info *group_info; /* supplementary groups for euid/fsgid */ /* RCU deletion */ union { int non_rcu; /* Can we skip RCU deletion? */ struct rcu_head rcu; /* RCU deletion hook */ }; } __randomize_layout; extern void __put_cred(struct cred *); extern void exit_creds(struct task_struct *); extern int copy_creds(struct task_struct *, unsigned long); extern const struct cred *get_task_cred(struct task_struct *); extern struct cred *cred_alloc_blank(void); extern struct cred *prepare_creds(void); extern struct cred *prepare_exec_creds(void); extern int commit_creds(struct cred *); extern void abort_creds(struct cred *); extern const struct cred *override_creds(const struct cred *); extern void revert_creds(const struct cred *); extern struct cred *prepare_kernel_cred(struct task_struct *); extern int change_create_files_as(struct cred *, struct inode *); extern int set_security_override(struct cred *, u32); extern int set_security_override_from_ctx(struct cred *, const char *); extern int set_create_files_as(struct cred *, struct inode *); extern int cred_fscmp(const struct cred *, const struct cred *); extern void __init cred_init(void); /* * check for validity of credentials */ #ifdef CONFIG_DEBUG_CREDENTIALS extern void __invalid_creds(const struct cred *, const char *, unsigned); extern void __validate_process_creds(struct task_struct *, const char *, unsigned); extern bool creds_are_invalid(const struct cred *cred); static inline void __validate_creds(const struct cred *cred, const char *file, unsigned line) { if (unlikely(creds_are_invalid(cred))) __invalid_creds(cred, file, line); } #define validate_creds(cred) \ do { \ __validate_creds((cred), __FILE__, __LINE__); \ } while(0) #define validate_process_creds() \ do { \ __validate_process_creds(current, __FILE__, __LINE__); \ } while(0) extern void validate_creds_for_do_exit(struct task_struct *); #else static inline void validate_creds(const struct cred *cred) { } static inline void validate_creds_for_do_exit(struct task_struct *tsk) { } static inline void validate_process_creds(void) { } #endif static inline bool cap_ambient_invariant_ok(const struct cred *cred) { return cap_issubset(cred->cap_ambient, cap_intersect(cred->cap_permitted, cred->cap_inheritable)); } /** * get_new_cred - Get a reference on a new set of credentials * @cred: The new credentials to reference * * Get a reference on the specified set of new credentials. The caller must * release the reference. */ static inline struct cred *get_new_cred(struct cred *cred) { atomic_inc(&cred->usage); return cred; } /** * get_cred - Get a reference on a set of credentials * @cred: The credentials to reference * * Get a reference on the specified set of credentials. The caller must * release the reference. If %NULL is passed, it is returned with no action. * * This is used to deal with a committed set of credentials. Although the * pointer is const, this will temporarily discard the const and increment the * usage count. The purpose of this is to attempt to catch at compile time the * accidental alteration of a set of credentials that should be considered * immutable. */ static inline const struct cred *get_cred(const struct cred *cred) { struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return cred; validate_creds(cred); nonconst_cred->non_rcu = 0; return get_new_cred(nonconst_cred); } static inline const struct cred *get_cred_rcu(const struct cred *cred) { struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return NULL; if (!atomic_inc_not_zero(&nonconst_cred->usage)) return NULL; validate_creds(cred); nonconst_cred->non_rcu = 0; return cred; } /** * put_cred - Release a reference to a set of credentials * @cred: The credentials to release * * Release a reference to a set of credentials, deleting them when the last ref * is released. If %NULL is passed, nothing is done. * * This takes a const pointer to a set of credentials because the credentials * on task_struct are attached by const pointers to prevent accidental * alteration of otherwise immutable credential sets. */ static inline void put_cred(const struct cred *_cred) { struct cred *cred = (struct cred *) _cred; if (cred) { validate_creds(cred); if (atomic_dec_and_test(&(cred)->usage)) __put_cred(cred); } } /** * current_cred - Access the current task's subjective credentials * * Access the subjective credentials of the current task. RCU-safe, * since nobody else can modify it. */ #define current_cred() \ rcu_dereference_protected(current->cred, 1) /** * current_real_cred - Access the current task's objective credentials * * Access the objective credentials of the current task. RCU-safe, * since nobody else can modify it. */ #define current_real_cred() \ rcu_dereference_protected(current->real_cred, 1) /** * __task_cred - Access a task's objective credentials * @task: The task to query * * Access the objective credentials of a task. The caller must hold the RCU * readlock. * * The result of this function should not be passed directly to get_cred(); * rather get_task_cred() should be used instead. */ #define __task_cred(task) \ rcu_dereference((task)->real_cred) /** * get_current_cred - Get the current task's subjective credentials * * Get the subjective credentials of the current task, pinning them so that * they can't go away. Accessing the current task's credentials directly is * not permitted. */ #define get_current_cred() \ (get_cred(current_cred())) /** * get_current_user - Get the current task's user_struct * * Get the user record of the current task, pinning it so that it can't go * away. */ #define get_current_user() \ ({ \ struct user_struct *__u; \ const struct cred *__cred; \ __cred = current_cred(); \ __u = get_uid(__cred->user); \ __u; \ }) /** * get_current_groups - Get the current task's supplementary group list * * Get the supplementary group list of the current task, pinning it so that it * can't go away. */ #define get_current_groups() \ ({ \ struct group_info *__groups; \ const struct cred *__cred; \ __cred = current_cred(); \ __groups = get_group_info(__cred->group_info); \ __groups; \ }) #define task_cred_xxx(task, xxx) \ ({ \ __typeof__(((struct cred *)NULL)->xxx) ___val; \ rcu_read_lock(); \ ___val = __task_cred((task))->xxx; \ rcu_read_unlock(); \ ___val; \ }) #define task_uid(task) (task_cred_xxx((task), uid)) #define task_euid(task) (task_cred_xxx((task), euid)) #define current_cred_xxx(xxx) \ ({ \ current_cred()->xxx; \ }) #define current_uid() (current_cred_xxx(uid)) #define current_gid() (current_cred_xxx(gid)) #define current_euid() (current_cred_xxx(euid)) #define current_egid() (current_cred_xxx(egid)) #define current_suid() (current_cred_xxx(suid)) #define current_sgid() (current_cred_xxx(sgid)) #define current_fsuid() (current_cred_xxx(fsuid)) #define current_fsgid() (current_cred_xxx(fsgid)) #define current_cap() (current_cred_xxx(cap_effective)) #define current_user() (current_cred_xxx(user)) extern struct user_namespace init_user_ns; #ifdef CONFIG_USER_NS #define current_user_ns() (current_cred_xxx(user_ns)) #else static inline struct user_namespace *current_user_ns(void) { return &init_user_ns; } #endif #define current_uid_gid(_uid, _gid) \ do { \ const struct cred *__cred; \ __cred = current_cred(); \ *(_uid) = __cred->uid; \ *(_gid) = __cred->gid; \ } while(0) #define current_euid_egid(_euid, _egid) \ do { \ const struct cred *__cred; \ __cred = current_cred(); \ *(_euid) = __cred->euid; \ *(_egid) = __cred->egid; \ } while(0) #define current_fsuid_fsgid(_fsuid, _fsgid) \ do { \ const struct cred *__cred; \ __cred = current_cred(); \ *(_fsuid) = __cred->fsuid; \ *(_fsgid) = __cred->fsgid; \ } while(0) #endif /* _LINUX_CRED_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_BITMAP_H #define __LINUX_BITMAP_H #ifndef __ASSEMBLY__ #include <linux/types.h> #include <linux/bitops.h> #include <linux/string.h> #include <linux/kernel.h> /* * bitmaps provide bit arrays that consume one or more unsigned * longs. The bitmap interface and available operations are listed * here, in bitmap.h * * Function implementations generic to all architectures are in * lib/bitmap.c. Functions implementations that are architecture * specific are in various include/asm-<arch>/bitops.h headers * and other arch/<arch> specific files. * * See lib/bitmap.c for more details. */ /** * DOC: bitmap overview * * The available bitmap operations and their rough meaning in the * case that the bitmap is a single unsigned long are thus: * * The generated code is more efficient when nbits is known at * compile-time and at most BITS_PER_LONG. * * :: * * bitmap_zero(dst, nbits) *dst = 0UL * bitmap_fill(dst, nbits) *dst = ~0UL * bitmap_copy(dst, src, nbits) *dst = *src * bitmap_and(dst, src1, src2, nbits) *dst = *src1 & *src2 * bitmap_or(dst, src1, src2, nbits) *dst = *src1 | *src2 * bitmap_xor(dst, src1, src2, nbits) *dst = *src1 ^ *src2 * bitmap_andnot(dst, src1, src2, nbits) *dst = *src1 & ~(*src2) * bitmap_complement(dst, src, nbits) *dst = ~(*src) * bitmap_equal(src1, src2, nbits) Are *src1 and *src2 equal? * bitmap_intersects(src1, src2, nbits) Do *src1 and *src2 overlap? * bitmap_subset(src1, src2, nbits) Is *src1 a subset of *src2? * bitmap_empty(src, nbits) Are all bits zero in *src? * bitmap_full(src, nbits) Are all bits set in *src? * bitmap_weight(src, nbits) Hamming Weight: number set bits * bitmap_set(dst, pos, nbits) Set specified bit area * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area * bitmap_find_next_zero_area_off(buf, len, pos, n, mask, mask_off) as above * bitmap_next_clear_region(map, &start, &end, nbits) Find next clear region * bitmap_next_set_region(map, &start, &end, nbits) Find next set region * bitmap_for_each_clear_region(map, rs, re, start, end) * Iterate over all clear regions * bitmap_for_each_set_region(map, rs, re, start, end) * Iterate over all set regions * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n * bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest * bitmap_replace(dst, old, new, mask, nbits) *dst = (*old & ~(*mask)) | (*new & *mask) * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) * bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap * bitmap_fold(dst, orig, sz, nbits) dst bits = orig bits mod sz * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from kernel buf * bitmap_parselist_user(buf, dst, nbits) Parse bitmap dst from user buf * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region * bitmap_release_region(bitmap, pos, order) Free specified bit region * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region * bitmap_from_arr32(dst, buf, nbits) Copy nbits from u32[] buf to dst * bitmap_to_arr32(buf, src, nbits) Copy nbits from buf to u32[] dst * bitmap_get_value8(map, start) Get 8bit value from map at start * bitmap_set_value8(map, value, start) Set 8bit value to map at start * * Note, bitmap_zero() and bitmap_fill() operate over the region of * unsigned longs, that is, bits behind bitmap till the unsigned long * boundary will be zeroed or filled as well. Consider to use * bitmap_clear() or bitmap_set() to make explicit zeroing or filling * respectively. */ /** * DOC: bitmap bitops * * Also the following operations in asm/bitops.h apply to bitmaps.:: * * set_bit(bit, addr) *addr |= bit * clear_bit(bit, addr) *addr &= ~bit * change_bit(bit, addr) *addr ^= bit * test_bit(bit, addr) Is bit set in *addr? * test_and_set_bit(bit, addr) Set bit and return old value * test_and_clear_bit(bit, addr) Clear bit and return old value * test_and_change_bit(bit, addr) Change bit and return old value * find_first_zero_bit(addr, nbits) Position first zero bit in *addr * find_first_bit(addr, nbits) Position first set bit in *addr * find_next_zero_bit(addr, nbits, bit) * Position next zero bit in *addr >= bit * find_next_bit(addr, nbits, bit) Position next set bit in *addr >= bit * find_next_and_bit(addr1, addr2, nbits, bit) * Same as find_next_bit, but in * (*addr1 & *addr2) * */ /** * DOC: declare bitmap * The DECLARE_BITMAP(name,bits) macro, in linux/types.h, can be used * to declare an array named 'name' of just enough unsigned longs to * contain all bit positions from 0 to 'bits' - 1. */ /* * Allocation and deallocation of bitmap. * Provided in lib/bitmap.c to avoid circular dependency. */ extern unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags); extern unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags); extern void bitmap_free(const unsigned long *bitmap); /* * lib/bitmap.c provides these functions: */ extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits); extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits); extern int __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern bool __pure __bitmap_or_equal(const unsigned long *src1, const unsigned long *src2, const unsigned long *src3, unsigned int nbits); extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int nbits); extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits); extern void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits); extern void bitmap_cut(unsigned long *dst, const unsigned long *src, unsigned int first, unsigned int cut, unsigned int nbits); extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_replace(unsigned long *dst, const unsigned long *old, const unsigned long *new, const unsigned long *mask, unsigned int nbits); extern int __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_subset(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); extern void __bitmap_set(unsigned long *map, unsigned int start, int len); extern void __bitmap_clear(unsigned long *map, unsigned int start, int len); extern unsigned long bitmap_find_next_zero_area_off(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, unsigned long align_mask, unsigned long align_offset); /** * bitmap_find_next_zero_area - find a contiguous aligned zero area * @map: The address to base the search on * @size: The bitmap size in bits * @start: The bitnumber to start searching at * @nr: The number of zeroed bits we're looking for * @align_mask: Alignment mask for zero area * * The @align_mask should be one less than a power of 2; the effect is that * the bit offset of all zero areas this function finds is multiples of that * power of 2. A @align_mask of 0 means no alignment is required. */ static inline unsigned long bitmap_find_next_zero_area(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, unsigned long align_mask) { return bitmap_find_next_zero_area_off(map, size, start, nr, align_mask, 0); } extern int bitmap_parse(const char *buf, unsigned int buflen, unsigned long *dst, int nbits); extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); extern int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits); extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); extern void bitmap_remap(unsigned long *dst, const unsigned long *src, const unsigned long *old, const unsigned long *new, unsigned int nbits); extern int bitmap_bitremap(int oldbit, const unsigned long *old, const unsigned long *new, int bits); extern void bitmap_onto(unsigned long *dst, const unsigned long *orig, const unsigned long *relmap, unsigned int bits); extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, unsigned int sz, unsigned int nbits); extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); #ifdef __BIG_ENDIAN extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); #else #define bitmap_copy_le bitmap_copy #endif extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits); extern int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) /* * The static inlines below do not handle constant nbits==0 correctly, * so make such users (should any ever turn up) call the out-of-line * versions. */ #define small_const_nbits(nbits) \ (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0) static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) { unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); memset(dst, 0, len); } static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) { unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); memset(dst, 0xff, len); } static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned int nbits) { unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); memcpy(dst, src, len); } /* * Copy bitmap and clear tail bits in last word. */ static inline void bitmap_copy_clear_tail(unsigned long *dst, const unsigned long *src, unsigned int nbits) { bitmap_copy(dst, src, nbits); if (nbits % BITS_PER_LONG) dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits); } /* * On 32-bit systems bitmaps are represented as u32 arrays internally, and * therefore conversion is not needed when copying data from/to arrays of u32. */ #if BITS_PER_LONG == 64 extern void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf, unsigned int nbits); extern void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, unsigned int nbits); #else #define bitmap_from_arr32(bitmap, buf, nbits) \ bitmap_copy_clear_tail((unsigned long *) (bitmap), \ (const unsigned long *) (buf), (nbits)) #define bitmap_to_arr32(buf, bitmap, nbits) \ bitmap_copy_clear_tail((unsigned long *) (buf), \ (const unsigned long *) (bitmap), (nbits)) #endif static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0; return __bitmap_and(dst, src1, src2, nbits); } static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = *src1 | *src2; else __bitmap_or(dst, src1, src2, nbits); } static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = *src1 ^ *src2; else __bitmap_xor(dst, src1, src2, nbits); } static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; return __bitmap_andnot(dst, src1, src2, nbits); } static inline void bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = ~(*src); else __bitmap_complement(dst, src, nbits); } #ifdef __LITTLE_ENDIAN #define BITMAP_MEM_ALIGNMENT 8 #else #define BITMAP_MEM_ALIGNMENT (8 * sizeof(unsigned long)) #endif #define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1) static inline int bitmap_equal(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); if (__builtin_constant_p(nbits & BITMAP_MEM_MASK) && IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) return !memcmp(src1, src2, nbits / 8); return __bitmap_equal(src1, src2, nbits); } /** * bitmap_or_equal - Check whether the or of two bitmaps is equal to a third * @src1: Pointer to bitmap 1 * @src2: Pointer to bitmap 2 will be or'ed with bitmap 1 * @src3: Pointer to bitmap 3. Compare to the result of *@src1 | *@src2 * @nbits: number of bits in each of these bitmaps * * Returns: True if (*@src1 | *@src2) == *@src3, false otherwise */ static inline bool bitmap_or_equal(const unsigned long *src1, const unsigned long *src2, const unsigned long *src3, unsigned int nbits) { if (!small_const_nbits(nbits)) return __bitmap_or_equal(src1, src2, src3, nbits); return !(((*src1 | *src2) ^ *src3) & BITMAP_LAST_WORD_MASK(nbits)); } static inline int bitmap_intersects(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; else return __bitmap_intersects(src1, src2, nbits); } static inline int bitmap_subset(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); else return __bitmap_subset(src1, src2, nbits); } static inline int bitmap_empty(const unsigned long *src, unsigned nbits) { if (small_const_nbits(nbits)) return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); return find_first_bit(src, nbits) == nbits; } static inline int bitmap_full(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); return find_first_zero_bit(src, nbits) == nbits; } static __always_inline int bitmap_weight(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); return __bitmap_weight(src, nbits); } static __always_inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) { if (__builtin_constant_p(nbits) && nbits == 1) __set_bit(start, map); else if (__builtin_constant_p(start & BITMAP_MEM_MASK) && IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) && __builtin_constant_p(nbits & BITMAP_MEM_MASK) && IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) memset((char *)map + start / 8, 0xff, nbits / 8); else __bitmap_set(map, start, nbits); } static __always_inline void bitmap_clear(unsigned long *map, unsigned int start, unsigned int nbits) { if (__builtin_constant_p(nbits) && nbits == 1) __clear_bit(start, map); else if (__builtin_constant_p(start & BITMAP_MEM_MASK) && IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) && __builtin_constant_p(nbits & BITMAP_MEM_MASK) && IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) memset((char *)map + start / 8, 0, nbits / 8); else __bitmap_clear(map, start, nbits); } static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift; else __bitmap_shift_right(dst, src, shift, nbits); } static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*src << shift) & BITMAP_LAST_WORD_MASK(nbits); else __bitmap_shift_left(dst, src, shift, nbits); } static inline void bitmap_replace(unsigned long *dst, const unsigned long *old, const unsigned long *new, const unsigned long *mask, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*old & ~(*mask)) | (*new & *mask); else __bitmap_replace(dst, old, new, mask, nbits); } static inline void bitmap_next_clear_region(unsigned long *bitmap, unsigned int *rs, unsigned int *re, unsigned int end) { *rs = find_next_zero_bit(bitmap, end, *rs); *re = find_next_bit(bitmap, end, *rs + 1); } static inline void bitmap_next_set_region(unsigned long *bitmap, unsigned int *rs, unsigned int *re, unsigned int end) { *rs = find_next_bit(bitmap, end, *rs); *re = find_next_zero_bit(bitmap, end, *rs + 1); } /* * Bitmap region iterators. Iterates over the bitmap between [@start, @end). * @rs and @re should be integer variables and will be set to start and end * index of the current clear or set region. */ #define bitmap_for_each_clear_region(bitmap, rs, re, start, end) \ for ((rs) = (start), \ bitmap_next_clear_region((bitmap), &(rs), &(re), (end)); \ (rs) < (re); \ (rs) = (re) + 1, \ bitmap_next_clear_region((bitmap), &(rs), &(re), (end))) #define bitmap_for_each_set_region(bitmap, rs, re, start, end) \ for ((rs) = (start), \ bitmap_next_set_region((bitmap), &(rs), &(re), (end)); \ (rs) < (re); \ (rs) = (re) + 1, \ bitmap_next_set_region((bitmap), &(rs), &(re), (end))) /** * BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap. * @n: u64 value * * Linux bitmaps are internally arrays of unsigned longs, i.e. 32-bit * integers in 32-bit environment, and 64-bit integers in 64-bit one. * * There are four combinations of endianness and length of the word in linux * ABIs: LE64, BE64, LE32 and BE32. * * On 64-bit kernels 64-bit LE and BE numbers are naturally ordered in * bitmaps and therefore don't require any special handling. * * On 32-bit kernels 32-bit LE ABI orders lo word of 64-bit number in memory * prior to hi, and 32-bit BE orders hi word prior to lo. The bitmap on the * other hand is represented as an array of 32-bit words and the position of * bit N may therefore be calculated as: word #(N/32) and bit #(N%32) in that * word. For example, bit #42 is located at 10th position of 2nd word. * It matches 32-bit LE ABI, and we can simply let the compiler store 64-bit * values in memory as it usually does. But for BE we need to swap hi and lo * words manually. * * With all that, the macro BITMAP_FROM_U64() does explicit reordering of hi and * lo parts of u64. For LE32 it does nothing, and for BE environment it swaps * hi and lo words, as is expected by bitmap. */ #if __BITS_PER_LONG == 64 #define BITMAP_FROM_U64(n) (n) #else #define BITMAP_FROM_U64(n) ((unsigned long) ((u64)(n) & ULONG_MAX)), \ ((unsigned long) ((u64)(n) >> 32)) #endif /** * bitmap_from_u64 - Check and swap words within u64. * @mask: source bitmap * @dst: destination bitmap * * In 32-bit Big Endian kernel, when using ``(u32 *)(&val)[*]`` * to read u64 mask, we will get the wrong word. * That is ``(u32 *)(&val)[0]`` gets the upper 32 bits, * but we expect the lower 32-bits of u64. */ static inline void bitmap_from_u64(unsigned long *dst, u64 mask) { dst[0] = mask & ULONG_MAX; if (sizeof(mask) > sizeof(unsigned long)) dst[1] = mask >> 32; } /** * bitmap_get_value8 - get an 8-bit value within a memory region * @map: address to the bitmap memory region * @start: bit offset of the 8-bit value; must be a multiple of 8 * * Returns the 8-bit value located at the @start bit offset within the @src * memory region. */ static inline unsigned long bitmap_get_value8(const unsigned long *map, unsigned long start) { const size_t index = BIT_WORD(start); const unsigned long offset = start % BITS_PER_LONG; return (map[index] >> offset) & 0xFF; } /** * bitmap_set_value8 - set an 8-bit value within a memory region * @map: address to the bitmap memory region * @value: the 8-bit value; values wider than 8 bits may clobber bitmap * @start: bit offset of the 8-bit value; must be a multiple of 8 */ static inline void bitmap_set_value8(unsigned long *map, unsigned long value, unsigned long start) { const size_t index = BIT_WORD(start); const unsigned long offset = start % BITS_PER_LONG; map[index] &= ~(0xFFUL << offset); map[index] |= value << offset; } #endif /* __ASSEMBLY__ */ #endif /* __LINUX_BITMAP_H */
1 2 3 4 5 6 7 8 9 10 11 /* SPDX-License-Identifier: GPL-2.0 */ #include <asm/processor.h> static inline int phys_addr_valid(resource_size_t addr) { #ifdef CONFIG_PHYS_ADDR_T_64BIT return !(addr >> boot_cpu_data.x86_phys_bits); #else return 1; #endif }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BSEARCH_H #define _LINUX_BSEARCH_H #include <linux/types.h> static __always_inline void *__inline_bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp) { const char *pivot; int result; while (num > 0) { pivot = base + (num >> 1) * size; result = cmp(key, pivot); if (result == 0) return (void *)pivot; if (result > 0) { base = pivot + size; num--; } num >>= 1; } return NULL; } extern void *bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp); #endif /* _LINUX_BSEARCH_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_STRINGHASH_H #define __LINUX_STRINGHASH_H #include <linux/compiler.h> /* For __pure */ #include <linux/types.h> /* For u32, u64 */ #include <linux/hash.h> /* * Routines for hashing strings of bytes to a 32-bit hash value. * * These hash functions are NOT GUARANTEED STABLE between kernel * versions, architectures, or even repeated boots of the same kernel. * (E.g. they may depend on boot-time hardware detection or be * deliberately randomized.) * * They are also not intended to be secure against collisions caused by * malicious inputs; much slower hash functions are required for that. * * They are optimized for pathname components, meaning short strings. * Even if a majority of files have longer names, the dynamic profile of * pathname components skews short due to short directory names. * (E.g. /usr/lib/libsesquipedalianism.so.3.141.) */ /* * Version 1: one byte at a time. Example of use: * * unsigned long hash = init_name_hash; * while (*p) * hash = partial_name_hash(tolower(*p++), hash); * hash = end_name_hash(hash); * * Although this is designed for bytes, fs/hfsplus/unicode.c * abuses it to hash 16-bit values. */ /* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ #define init_name_hash(salt) (unsigned long)(salt) /* partial hash update function. Assume roughly 4 bits per character */ static inline unsigned long partial_name_hash(unsigned long c, unsigned long prevhash) { return (prevhash + (c << 4) + (c >> 4)) * 11; } /* * Finally: cut down the number of bits to a int value (and try to avoid * losing bits). This also has the property (wanted by the dcache) * that the msbits make a good hash table index. */ static inline unsigned int end_name_hash(unsigned long hash) { return hash_long(hash, 32); } /* * Version 2: One word (32 or 64 bits) at a time. * If CONFIG_DCACHE_WORD_ACCESS is defined (meaning <asm/word-at-a-time.h> * exists, which describes major Linux platforms like x86 and ARM), then * this computes a different hash function much faster. * * If not set, this falls back to a wrapper around the preceding. */ extern unsigned int __pure full_name_hash(const void *salt, const char *, unsigned int); /* * A hash_len is a u64 with the hash of a string in the low * half and the length in the high half. */ #define hashlen_hash(hashlen) ((u32)(hashlen)) #define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) #define hashlen_create(hash, len) ((u64)(len)<<32 | (u32)(hash)) /* Return the "hash_len" (hash and length) of a null-terminated string */ extern u64 __pure hashlen_string(const void *salt, const char *name); #endif /* __LINUX_STRINGHASH_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 /* SPDX-License-Identifier: GPL-2.0-only */ /* * async.h: Asynchronous function calls for boot performance * * (C) Copyright 2009 Intel Corporation * Author: Arjan van de Ven <arjan@linux.intel.com> */ #ifndef __ASYNC_H__ #define __ASYNC_H__ #include <linux/types.h> #include <linux/list.h> #include <linux/numa.h> #include <linux/device.h> typedef u64 async_cookie_t; typedef void (*async_func_t) (void *data, async_cookie_t cookie); struct async_domain { struct list_head pending; unsigned registered:1; }; /* * domain participates in global async_synchronize_full */ #define ASYNC_DOMAIN(_name) \ struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \ .registered = 1 } /* * domain is free to go out of scope as soon as all pending work is * complete, this domain does not participate in async_synchronize_full */ #define ASYNC_DOMAIN_EXCLUSIVE(_name) \ struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \ .registered = 0 } async_cookie_t async_schedule_node(async_func_t func, void *data, int node); async_cookie_t async_schedule_node_domain(async_func_t func, void *data, int node, struct async_domain *domain); /** * async_schedule - schedule a function for asynchronous execution * @func: function to execute asynchronously * @data: data pointer to pass to the function * * Returns an async_cookie_t that may be used for checkpointing later. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule(async_func_t func, void *data) { return async_schedule_node(func, data, NUMA_NO_NODE); } /** * async_schedule_domain - schedule a function for asynchronous execution within a certain domain * @func: function to execute asynchronously * @data: data pointer to pass to the function * @domain: the domain * * Returns an async_cookie_t that may be used for checkpointing later. * @domain may be used in the async_synchronize_*_domain() functions to * wait within a certain synchronization domain rather than globally. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule_domain(async_func_t func, void *data, struct async_domain *domain) { return async_schedule_node_domain(func, data, NUMA_NO_NODE, domain); } /** * async_schedule_dev - A device specific version of async_schedule * @func: function to execute asynchronously * @dev: device argument to be passed to function * * Returns an async_cookie_t that may be used for checkpointing later. * @dev is used as both the argument for the function and to provide NUMA * context for where to run the function. By doing this we can try to * provide for the best possible outcome by operating on the device on the * CPUs closest to the device. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule_dev(async_func_t func, struct device *dev) { return async_schedule_node(func, dev, dev_to_node(dev)); } /** * async_schedule_dev_domain - A device specific version of async_schedule_domain * @func: function to execute asynchronously * @dev: device argument to be passed to function * @domain: the domain * * Returns an async_cookie_t that may be used for checkpointing later. * @dev is used as both the argument for the function and to provide NUMA * context for where to run the function. By doing this we can try to * provide for the best possible outcome by operating on the device on the * CPUs closest to the device. * @domain may be used in the async_synchronize_*_domain() functions to * wait within a certain synchronization domain rather than globally. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule_dev_domain(async_func_t func, struct device *dev, struct async_domain *domain) { return async_schedule_node_domain(func, dev, dev_to_node(dev), domain); } void async_unregister_domain(struct async_domain *domain); extern void async_synchronize_full(void); extern void async_synchronize_full_domain(struct async_domain *domain); extern void async_synchronize_cookie(async_cookie_t cookie); extern void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain); extern bool current_is_async(void); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_GFP_H #define __LINUX_GFP_H #include <linux/mmdebug.h> #include <linux/mmzone.h> #include <linux/stddef.h> #include <linux/linkage.h> #include <linux/topology.h> struct vm_area_struct; /* * In case of changes, please don't forget to update * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c */ /* Plain integer GFP bitmasks. Do not use this directly. */ #define ___GFP_DMA 0x01u #define ___GFP_HIGHMEM 0x02u #define ___GFP_DMA32 0x04u #define ___GFP_MOVABLE 0x08u #define ___GFP_RECLAIMABLE 0x10u #define ___GFP_HIGH 0x20u #define ___GFP_IO 0x40u #define ___GFP_FS 0x80u #define ___GFP_ZERO 0x100u #define ___GFP_ATOMIC 0x200u #define ___GFP_DIRECT_RECLAIM 0x400u #define ___GFP_KSWAPD_RECLAIM 0x800u #define ___GFP_WRITE 0x1000u #define ___GFP_NOWARN 0x2000u #define ___GFP_RETRY_MAYFAIL 0x4000u #define ___GFP_NOFAIL 0x8000u #define ___GFP_NORETRY 0x10000u #define ___GFP_MEMALLOC 0x20000u #define ___GFP_COMP 0x40000u #define ___GFP_NOMEMALLOC 0x80000u #define ___GFP_HARDWALL 0x100000u #define ___GFP_THISNODE 0x200000u #define ___GFP_ACCOUNT 0x400000u #ifdef CONFIG_LOCKDEP #define ___GFP_NOLOCKDEP 0x800000u #else #define ___GFP_NOLOCKDEP 0 #endif /* If the above are modified, __GFP_BITS_SHIFT may need updating */ /* * Physical address zone modifiers (see linux/mmzone.h - low four bits) * * Do not put any conditional on these. If necessary modify the definitions * without the underscores and use them consistently. The definitions here may * be used in bit comparisons. */ #define __GFP_DMA ((__force gfp_t)___GFP_DMA) #define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM) #define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32) #define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */ #define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) /** * DOC: Page mobility and placement hints * * Page mobility and placement hints * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * These flags provide hints about how mobile the page is. Pages with similar * mobility are placed within the same pageblocks to minimise problems due * to external fragmentation. * * %__GFP_MOVABLE (also a zone modifier) indicates that the page can be * moved by page migration during memory compaction or can be reclaimed. * * %__GFP_RECLAIMABLE is used for slab allocations that specify * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers. * * %__GFP_WRITE indicates the caller intends to dirty the page. Where possible, * these pages will be spread between local zones to avoid all the dirty * pages being in one zone (fair zone allocation policy). * * %__GFP_HARDWALL enforces the cpuset memory allocation policy. * * %__GFP_THISNODE forces the allocation to be satisfied from the requested * node with no fallbacks or placement policy enforcements. * * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg. */ #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) #define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) #define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) #define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT) /** * DOC: Watermark modifiers * * Watermark modifiers -- controls access to emergency reserves * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * %__GFP_HIGH indicates that the caller is high-priority and that granting * the request is necessary before the system can make forward progress. * For example, creating an IO context to clean pages. * * %__GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is * high priority. Users are typically interrupt handlers. This may be * used in conjunction with %__GFP_HIGH * * %__GFP_MEMALLOC allows access to all memory. This should only be used when * the caller guarantees the allocation will allow more memory to be freed * very shortly e.g. process exiting or swapping. Users either should * be the MM or co-ordinating closely with the VM (e.g. swap over NFS). * Users of this flag have to be extremely careful to not deplete the reserve * completely and implement a throttling mechanism which controls the * consumption of the reserve based on the amount of freed memory. * Usage of a pre-allocated pool (e.g. mempool) should be always considered * before using this flag. * * %__GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves. * This takes precedence over the %__GFP_MEMALLOC flag if both are set. */ #define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC) #define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) #define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) #define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /** * DOC: Reclaim modifiers * * Reclaim modifiers * ~~~~~~~~~~~~~~~~~ * Please note that all the following flags are only applicable to sleepable * allocations (e.g. %GFP_NOWAIT and %GFP_ATOMIC will ignore them). * * %__GFP_IO can start physical IO. * * %__GFP_FS can call down to the low-level FS. Clearing the flag avoids the * allocator recursing into the filesystem which might already be holding * locks. * * %__GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim. * This flag can be cleared to avoid unnecessary delays when a fallback * option is available. * * %__GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when * the low watermark is reached and have it reclaim pages until the high * watermark is reached. A caller may wish to clear this flag when fallback * options are available and the reclaim is likely to disrupt the system. The * canonical example is THP allocation where a fallback is cheap but * reclaim/compaction may cause indirect stalls. * * %__GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. * * The default allocator behavior depends on the request size. We have a concept * of so called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER). * !costly allocations are too essential to fail so they are implicitly * non-failing by default (with some exceptions like OOM victims might fail so * the caller still has to check for failures) while costly requests try to be * not disruptive and back off even without invoking the OOM killer. * The following three modifiers might be used to override some of these * implicit rules * * %__GFP_NORETRY: The VM implementation will try only very lightweight * memory direct reclaim to get some memory under memory pressure (thus * it can sleep). It will avoid disruptive actions like OOM killer. The * caller must handle the failure which is quite likely to happen under * heavy memory pressure. The flag is suitable when failure can easily be * handled at small cost, such as reduced throughput * * %__GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim * procedures that have previously failed if there is some indication * that progress has been made else where. It can wait for other * tasks to attempt high level approaches to freeing memory such as * compaction (which removes fragmentation) and page-out. * There is still a definite limit to the number of retries, but it is * a larger limit than with %__GFP_NORETRY. * Allocations with this flag may fail, but only when there is * genuinely little unused memory. While these allocations do not * directly trigger the OOM killer, their failure indicates that * the system is likely to need to use the OOM killer soon. The * caller must handle failure, but can reasonably do so by failing * a higher-level request, or completing it only in a much less * efficient manner. * If the allocation does fail, and the caller is in a position to * free some non-essential memory, doing so could benefit the system * as a whole. * * %__GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller * cannot handle allocation failures. The allocation could block * indefinitely but will never return with failure. Testing for * failure is pointless. * New users should be evaluated carefully (and the flag should be * used only when there is no reasonable failure policy) but it is * definitely preferable to use the flag rather than opencode endless * loop around allocator. * Using this flag for costly allocations is _highly_ discouraged. */ #define __GFP_IO ((__force gfp_t)___GFP_IO) #define __GFP_FS ((__force gfp_t)___GFP_FS) #define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ #define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ #define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) #define __GFP_RETRY_MAYFAIL ((__force gfp_t)___GFP_RETRY_MAYFAIL) #define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) #define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) /** * DOC: Action modifiers * * Action modifiers * ~~~~~~~~~~~~~~~~ * * %__GFP_NOWARN suppresses allocation failure reports. * * %__GFP_COMP address compound page metadata. * * %__GFP_ZERO returns a zeroed page on success. */ #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) #define __GFP_COMP ((__force gfp_t)___GFP_COMP) #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) /* Disable lockdep for GFP context tracking */ #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) /* Room for N __GFP_FOO bits */ #define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /** * DOC: Useful GFP flag combinations * * Useful GFP flag combinations * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * Useful GFP flag combinations that are commonly used. It is recommended * that subsystems start with one of these combinations and then set/clear * %__GFP_FOO flags as necessary. * * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower * watermark is applied to allow access to "atomic reserves". * The current implementation doesn't support NMI and few other strict * non-preemptive contexts (e.g. raw_spin_lock). The same applies to %GFP_NOWAIT. * * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim. * * %GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is * accounted to kmemcg. * * %GFP_NOWAIT is for kernel allocations that should not stall for direct * reclaim, start physical IO or use any filesystem callback. * * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages * that do not require the starting of any physical IO. * Please try to avoid using this flag directly and instead use * memalloc_noio_{save,restore} to mark the whole scope which cannot * perform any IO with a short explanation why. All allocation requests * will inherit GFP_NOIO implicitly. * * %GFP_NOFS will use direct reclaim but will not use any filesystem interfaces. * Please try to avoid using this flag directly and instead use * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't * recurse into the FS layer with a short explanation why. All allocation * requests will inherit GFP_NOFS implicitly. * * %GFP_USER is for userspace allocations that also need to be directly * accessibly by the kernel or hardware. It is typically used by hardware * for buffers that are mapped to userspace (e.g. graphics) that hardware * still must DMA to. cpuset limits are enforced for these allocations. * * %GFP_DMA exists for historical reasons and should be avoided where possible. * The flags indicates that the caller requires that the lowest zone be * used (%ZONE_DMA or 16M on x86-64). Ideally, this would be removed but * it would require careful auditing as some users really require it and * others use the flag to avoid lowmem reserves in %ZONE_DMA and treat the * lowest zone as a type of emergency reserve. * * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit * address. * * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace, * do not need to be directly accessible by the kernel but that cannot * move once in use. An example may be a hardware allocation that maps * data directly into userspace but has no addressing limitations. * * %GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not * need direct access to but can use kmap() when access is required. They * are expected to be movable via page reclaim or page migration. Typically, * pages on the LRU would also be allocated with %GFP_HIGHUSER_MOVABLE. * * %GFP_TRANSHUGE and %GFP_TRANSHUGE_LIGHT are used for THP allocations. They * are compound allocations that will generally fail quickly if memory is not * available and will not wake kswapd/kcompactd on failure. The _LIGHT * version does not attempt reclaim/compaction at all and is by default used * in page fault path, while the non-light is used by khugepaged. */ #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) #define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) #define GFP_NOIO (__GFP_RECLAIM) #define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) #define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) #define GFP_DMA __GFP_DMA #define GFP_DMA32 __GFP_DMA32 #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) #define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) #define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) /* Convert GFP flags to their corresponding migrate type */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) #define GFP_MOVABLE_SHIFT 3 static inline int gfp_migratetype(const gfp_t gfp_flags) { VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE); BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE); if (unlikely(page_group_by_mobility_disabled)) return MIGRATE_UNMOVABLE; /* Group based on mobility */ return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT; } #undef GFP_MOVABLE_MASK #undef GFP_MOVABLE_SHIFT static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) { return !!(gfp_flags & __GFP_DIRECT_RECLAIM); } /** * gfpflags_normal_context - is gfp_flags a normal sleepable context? * @gfp_flags: gfp_flags to test * * Test whether @gfp_flags indicates that the allocation is from the * %current context and allowed to sleep. * * An allocation being allowed to block doesn't mean it owns the %current * context. When direct reclaim path tries to allocate memory, the * allocation context is nested inside whatever %current was doing at the * time of the original allocation. The nested allocation may be allowed * to block but modifying anything %current owns can corrupt the outer * context's expectations. * * %true result from this function indicates that the allocation context * can sleep and use anything that's associated with %current. */ static inline bool gfpflags_normal_context(const gfp_t gfp_flags) { return (gfp_flags & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC)) == __GFP_DIRECT_RECLAIM; } #ifdef CONFIG_HIGHMEM #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM #else #define OPT_ZONE_HIGHMEM ZONE_NORMAL #endif #ifdef CONFIG_ZONE_DMA #define OPT_ZONE_DMA ZONE_DMA #else #define OPT_ZONE_DMA ZONE_NORMAL #endif #ifdef CONFIG_ZONE_DMA32 #define OPT_ZONE_DMA32 ZONE_DMA32 #else #define OPT_ZONE_DMA32 ZONE_NORMAL #endif /* * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the * zone to use given the lowest 4 bits of gfp_t. Entries are GFP_ZONES_SHIFT * bits long and there are 16 of them to cover all possible combinations of * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM. * * The zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA. * But GFP_MOVABLE is not only a zone specifier but also an allocation * policy. Therefore __GFP_MOVABLE plus another zone selector is valid. * Only 1 bit of the lowest 3 bits (DMA,DMA32,HIGHMEM) can be set to "1". * * bit result * ================= * 0x0 => NORMAL * 0x1 => DMA or NORMAL * 0x2 => HIGHMEM or NORMAL * 0x3 => BAD (DMA+HIGHMEM) * 0x4 => DMA32 or NORMAL * 0x5 => BAD (DMA+DMA32) * 0x6 => BAD (HIGHMEM+DMA32) * 0x7 => BAD (HIGHMEM+DMA32+DMA) * 0x8 => NORMAL (MOVABLE+0) * 0x9 => DMA or NORMAL (MOVABLE+DMA) * 0xa => MOVABLE (Movable is valid only if HIGHMEM is set too) * 0xb => BAD (MOVABLE+HIGHMEM+DMA) * 0xc => DMA32 or NORMAL (MOVABLE+DMA32) * 0xd => BAD (MOVABLE+DMA32+DMA) * 0xe => BAD (MOVABLE+DMA32+HIGHMEM) * 0xf => BAD (MOVABLE+DMA32+HIGHMEM+DMA) * * GFP_ZONES_SHIFT must be <= 2 on 32 bit platforms. */ #if defined(CONFIG_ZONE_DEVICE) && (MAX_NR_ZONES-1) <= 4 /* ZONE_DEVICE is not a valid GFP zone specifier */ #define GFP_ZONES_SHIFT 2 #else #define GFP_ZONES_SHIFT ZONES_SHIFT #endif #if 16 * GFP_ZONES_SHIFT > BITS_PER_LONG #error GFP_ZONES_SHIFT too large to create GFP_ZONE_TABLE integer #endif #define GFP_ZONE_TABLE ( \ (ZONE_NORMAL << 0 * GFP_ZONES_SHIFT) \ | (OPT_ZONE_DMA << ___GFP_DMA * GFP_ZONES_SHIFT) \ | (OPT_ZONE_HIGHMEM << ___GFP_HIGHMEM * GFP_ZONES_SHIFT) \ | (OPT_ZONE_DMA32 << ___GFP_DMA32 * GFP_ZONES_SHIFT) \ | (ZONE_NORMAL << ___GFP_MOVABLE * GFP_ZONES_SHIFT) \ | (OPT_ZONE_DMA << (___GFP_MOVABLE | ___GFP_DMA) * GFP_ZONES_SHIFT) \ | (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * GFP_ZONES_SHIFT)\ | (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * GFP_ZONES_SHIFT)\ ) /* * GFP_ZONE_BAD is a bitmap for all combinations of __GFP_DMA, __GFP_DMA32 * __GFP_HIGHMEM and __GFP_MOVABLE that are not permitted. One flag per * entry starting with bit 0. Bit is set if the combination is not * allowed. */ #define GFP_ZONE_BAD ( \ 1 << (___GFP_DMA | ___GFP_HIGHMEM) \ | 1 << (___GFP_DMA | ___GFP_DMA32) \ | 1 << (___GFP_DMA32 | ___GFP_HIGHMEM) \ | 1 << (___GFP_DMA | ___GFP_DMA32 | ___GFP_HIGHMEM) \ | 1 << (___GFP_MOVABLE | ___GFP_HIGHMEM | ___GFP_DMA) \ | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA) \ | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_HIGHMEM) \ | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA | ___GFP_HIGHMEM) \ ) static inline enum zone_type gfp_zone(gfp_t flags) { enum zone_type z; int bit = (__force int) (flags & GFP_ZONEMASK); z = (GFP_ZONE_TABLE >> (bit * GFP_ZONES_SHIFT)) & ((1 << GFP_ZONES_SHIFT) - 1); VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1); return z; } /* * There is only one page-allocator function, and two main namespaces to * it. The alloc_page*() variants return 'struct page *' and as such * can allocate highmem pages, the *get*page*() variants return * virtual kernel addresses to the allocated page(s). */ static inline int gfp_zonelist(gfp_t flags) { #ifdef CONFIG_NUMA if (unlikely(flags & __GFP_THISNODE)) return ZONELIST_NOFALLBACK; #endif return ZONELIST_FALLBACK; } /* * We get the zone list from the current node and the gfp_mask. * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones. * There are two zonelists per node, one for all zones with memory and * one containing just zones from the node the zonelist belongs to. * * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets * optimized to &contig_page_data at compile-time. */ static inline struct zonelist *node_zonelist(int nid, gfp_t flags) { return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags); } #ifndef HAVE_ARCH_FREE_PAGE static inline void arch_free_page(struct page *page, int order) { } #endif #ifndef HAVE_ARCH_ALLOC_PAGE static inline void arch_alloc_page(struct page *page, int order) { } #endif #ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE static inline int arch_make_page_accessible(struct page *page) { return 0; } #endif struct page * __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, nodemask_t *nodemask); static inline struct page * __alloc_pages(gfp_t gfp_mask, unsigned int order, int preferred_nid) { return __alloc_pages_nodemask(gfp_mask, order, preferred_nid, NULL); } /* * Allocate pages, preferring the node given as nid. The node must be valid and * online. For more general interface, see alloc_pages_node(). */ static inline struct page * __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid)); return __alloc_pages(gfp_mask, order, nid); } /* * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE, * prefer the current CPU's closest node. Otherwise node must be valid and * online. */ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { if (nid == NUMA_NO_NODE) nid = numa_mem_id(); return __alloc_pages_node(nid, gfp_mask, order); } #ifdef CONFIG_NUMA extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order); static inline struct page * alloc_pages(gfp_t gfp_mask, unsigned int order) { return alloc_pages_current(gfp_mask, order); } extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, int node, bool hugepage); #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) #else static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order) { return alloc_pages_node(numa_node_id(), gfp_mask, order); } #define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ alloc_pages(gfp_mask, order) #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ alloc_pages(gfp_mask, order) #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page_vma(gfp_mask, vma, addr) \ alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); void *alloc_pages_exact(size_t size, gfp_t gfp_mask); void free_pages_exact(void *virt, size_t size); void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask), 0) #define __get_dma_pages(gfp_mask, order) \ __get_free_pages((gfp_mask) | GFP_DMA, (order)) extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); extern void free_unref_page(struct page *page); extern void free_unref_page_list(struct list_head *list); struct page_frag_cache; extern void __page_frag_cache_drain(struct page *page, unsigned int count); extern void *page_frag_alloc(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask); extern void page_frag_free(void *addr); #define __free_page(page) __free_pages((page), 0) #define free_page(addr) free_pages((addr), 0) void page_alloc_init(void); void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); void drain_all_pages(struct zone *zone); void drain_local_pages(struct zone *zone); void page_alloc_init_late(void); /* * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what * GFP flags are used before interrupts are enabled. Once interrupts are * enabled, it is set to __GFP_BITS_MASK while the system is running. During * hibernation, it is used by PM to avoid I/O during memory allocation while * devices are suspended. */ extern gfp_t gfp_allowed_mask; /* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask); extern void pm_restrict_gfp_mask(void); extern void pm_restore_gfp_mask(void); #ifdef CONFIG_PM_SLEEP extern bool pm_suspended_storage(void); #else static inline bool pm_suspended_storage(void) { return false; } #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_CONTIG_ALLOC /* The below functions must be run on a range from a single zone. */ extern int alloc_contig_range(unsigned long start, unsigned long end, unsigned migratetype, gfp_t gfp_mask); extern struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask, int nid, nodemask_t *nodemask); #endif void free_contig_range(unsigned long pfn, unsigned int nr_pages); #ifdef CONFIG_CMA /* CMA stuff */ extern void init_cma_reserved_pageblock(struct page *page); #endif #endif /* __LINUX_GFP_H */
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 // SPDX-License-Identifier: GPL-2.0-only /* * "splice": joining two ropes together by interweaving their strands. * * This is the "extended pipe" functionality, where a pipe is used as * an arbitrary in-memory buffer. Think of a pipe as a small kernel * buffer that you can use to transfer data from one end to the other. * * The traditional unix read/write is extended with a "splice()" operation * that transfers data buffers to or from a pipe buffer. * * Named by Larry McVoy, original implementation from Linus, extended by * Jens to support splicing to files, network, direct splicing, etc and * fixing lots of bugs. * * Copyright (C) 2005-2006 Jens Axboe <axboe@kernel.dk> * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org> * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu> * */ #include <linux/bvec.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/pagemap.h> #include <linux/splice.h> #include <linux/memcontrol.h> #include <linux/mm_inline.h> #include <linux/swap.h> #include <linux/writeback.h> #include <linux/export.h> #include <linux/syscalls.h> #include <linux/uio.h> #include <linux/security.h> #include <linux/gfp.h> #include <linux/socket.h> #include <linux/sched/signal.h> #include "internal.h" /* * Attempt to steal a page from a pipe buffer. This should perhaps go into * a vm helper function, it's already simplified quite a bit by the * addition of remove_mapping(). If success is returned, the caller may * attempt to reuse this page for another destination. */ static bool page_cache_pipe_buf_try_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct page *page = buf->page; struct address_space *mapping; lock_page(page); mapping = page_mapping(page); if (mapping) { WARN_ON(!PageUptodate(page)); /* * At least for ext2 with nobh option, we need to wait on * writeback completing on this page, since we'll remove it * from the pagecache. Otherwise truncate wont wait on the * page, allowing the disk blocks to be reused by someone else * before we actually wrote our data to them. fs corruption * ensues. */ wait_on_page_writeback(page); if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) goto out_unlock; /* * If we succeeded in removing the mapping, set LRU flag * and return good. */ if (remove_mapping(mapping, page)) { buf->flags |= PIPE_BUF_FLAG_LRU; return true; } } /* * Raced with truncate or failed to remove page from current * address space, unlock and return failure. */ out_unlock: unlock_page(page); return false; } static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { put_page(buf->page); buf->flags &= ~PIPE_BUF_FLAG_LRU; } /* * Check whether the contents of buf is OK to access. Since the content * is a page cache page, IO may be in flight. */ static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct page *page = buf->page; int err; if (!PageUptodate(page)) { lock_page(page); /* * Page got truncated/unhashed. This will cause a 0-byte * splice, if this is the first page. */ if (!page->mapping) { err = -ENODATA; goto error; } /* * Uh oh, read-error from disk. */ if (!PageUptodate(page)) { err = -EIO; goto error; } /* * Page is ok afterall, we are done. */ unlock_page(page); } return 0; error: unlock_page(page); return err; } const struct pipe_buf_operations page_cache_pipe_buf_ops = { .confirm = page_cache_pipe_buf_confirm, .release = page_cache_pipe_buf_release, .try_steal = page_cache_pipe_buf_try_steal, .get = generic_pipe_buf_get, }; static bool user_page_pipe_buf_try_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { if (!(buf->flags & PIPE_BUF_FLAG_GIFT)) return false; buf->flags |= PIPE_BUF_FLAG_LRU; return generic_pipe_buf_try_steal(pipe, buf); } static const struct pipe_buf_operations user_page_pipe_buf_ops = { .release = page_cache_pipe_buf_release, .try_steal = user_page_pipe_buf_try_steal, .get = generic_pipe_buf_get, }; static void wakeup_pipe_readers(struct pipe_inode_info *pipe) { smp_mb(); if (waitqueue_active(&pipe->rd_wait)) wake_up_interruptible(&pipe->rd_wait); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } /** * splice_to_pipe - fill passed data into a pipe * @pipe: pipe to fill * @spd: data to fill * * Description: * @spd contains a map of pages and len/offset tuples, along with * the struct pipe_buf_operations associated with these pages. This * function will link that data to the pipe. * */ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) { unsigned int spd_pages = spd->nr_pages; unsigned int tail = pipe->tail; unsigned int head = pipe->head; unsigned int mask = pipe->ring_size - 1; int ret = 0, page_nr = 0; if (!spd_pages) return 0; if (unlikely(!pipe->readers)) { send_sig(SIGPIPE, current, 0); ret = -EPIPE; goto out; } while (!pipe_full(head, tail, pipe->max_usage)) { struct pipe_buffer *buf = &pipe->bufs[head & mask]; buf->page = spd->pages[page_nr]; buf->offset = spd->partial[page_nr].offset; buf->len = spd->partial[page_nr].len; buf->private = spd->partial[page_nr].private; buf->ops = spd->ops; buf->flags = 0; head++; pipe->head = head; page_nr++; ret += buf->len; if (!--spd->nr_pages) break; } if (!ret) ret = -EAGAIN; out: while (page_nr < spd_pages) spd->spd_release(spd, page_nr++); return ret; } EXPORT_SYMBOL_GPL(splice_to_pipe); ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { unsigned int head = pipe->head; unsigned int tail = pipe->tail; unsigned int mask = pipe->ring_size - 1; int ret; if (unlikely(!pipe->readers)) { send_sig(SIGPIPE, current, 0); ret = -EPIPE; } else if (pipe_full(head, tail, pipe->max_usage)) { ret = -EAGAIN; } else { pipe->bufs[head & mask] = *buf; pipe->head = head + 1; return buf->len; } pipe_buf_release(pipe, buf); return ret; } EXPORT_SYMBOL(add_to_pipe); /* * Check if we need to grow the arrays holding pages and partial page * descriptions. */ int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) { unsigned int max_usage = READ_ONCE(pipe->max_usage); spd->nr_pages_max = max_usage; if (max_usage <= PIPE_DEF_BUFFERS) return 0; spd->pages = kmalloc_array(max_usage, sizeof(struct page *), GFP_KERNEL); spd->partial = kmalloc_array(max_usage, sizeof(struct partial_page), GFP_KERNEL); if (spd->pages && spd->partial) return 0; kfree(spd->pages); kfree(spd->partial); return -ENOMEM; } void splice_shrink_spd(struct splice_pipe_desc *spd) { if (spd->nr_pages_max <= PIPE_DEF_BUFFERS) return; kfree(spd->pages); kfree(spd->partial); } /** * generic_file_splice_read - splice data from file to a pipe * @in: file to splice from * @ppos: position in @in * @pipe: pipe to splice to * @len: number of bytes to splice * @flags: splice modifier flags * * Description: * Will read pages from given file and fill them into a pipe. Can be * used as long as it has more or less sane ->read_iter(). * */ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { struct iov_iter to; struct kiocb kiocb; unsigned int i_head; int ret; iov_iter_pipe(&to, READ, pipe, len); i_head = to.head; init_sync_kiocb(&kiocb, in); kiocb.ki_pos = *ppos; ret = call_read_iter(in, &kiocb, &to); if (ret > 0) { *ppos = kiocb.ki_pos; file_accessed(in); } else if (ret < 0) { to.head = i_head; to.iov_offset = 0; iov_iter_advance(&to, 0); /* to free what was emitted */ /* * callers of ->splice_read() expect -EAGAIN on * "can't put anything in there", rather than -EFAULT. */ if (ret == -EFAULT) ret = -EAGAIN; } return ret; } EXPORT_SYMBOL(generic_file_splice_read); const struct pipe_buf_operations default_pipe_buf_ops = { .release = generic_pipe_buf_release, .try_steal = generic_pipe_buf_try_steal, .get = generic_pipe_buf_get, }; /* Pipe buffer operations for a socket and similar. */ const struct pipe_buf_operations nosteal_pipe_buf_ops = { .release = generic_pipe_buf_release, .get = generic_pipe_buf_get, }; EXPORT_SYMBOL(nosteal_pipe_buf_ops); /* * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' * using sendpage(). Return the number of bytes sent. */ static int pipe_to_sendpage(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { struct file *file = sd->u.file; loff_t pos = sd->pos; int more; if (!likely(file->f_op->sendpage)) return -EINVAL; more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; if (sd->len < sd->total_len && pipe_occupancy(pipe->head, pipe->tail) > 1) more |= MSG_SENDPAGE_NOTLAST; return file->f_op->sendpage(file, buf->page, buf->offset, sd->len, &pos, more); } static void wakeup_pipe_writers(struct pipe_inode_info *pipe) { smp_mb(); if (waitqueue_active(&pipe->wr_wait)) wake_up_interruptible(&pipe->wr_wait); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } /** * splice_from_pipe_feed - feed available data from a pipe to a file * @pipe: pipe to splice from * @sd: information to @actor * @actor: handler that splices the data * * Description: * This function loops over the pipe and calls @actor to do the * actual moving of a single struct pipe_buffer to the desired * destination. It returns when there's no more buffers left in * the pipe or if the requested number of bytes (@sd->total_len) * have been copied. It returns a positive number (one) if the * pipe needs to be filled with more data, zero if the required * number of bytes have been copied and -errno on error. * * This, together with splice_from_pipe_{begin,end,next}, may be * used to implement the functionality of __splice_from_pipe() when * locking is required around copying the pipe buffers to the * destination. */ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, splice_actor *actor) { unsigned int head = pipe->head; unsigned int tail = pipe->tail; unsigned int mask = pipe->ring_size - 1; int ret; while (!pipe_empty(head, tail)) { struct pipe_buffer *buf = &pipe->bufs[tail & mask]; sd->len = buf->len; if (sd->len > sd->total_len) sd->len = sd->total_len; ret = pipe_buf_confirm(pipe, buf); if (unlikely(ret)) { if (ret == -ENODATA) ret = 0; return ret; } ret = actor(pipe, buf, sd); if (ret <= 0) return ret; buf->offset += ret; buf->len -= ret; sd->num_spliced += ret; sd->len -= ret; sd->pos += ret; sd->total_len -= ret; if (!buf->len) { pipe_buf_release(pipe, buf); tail++; pipe->tail = tail; if (pipe->files) sd->need_wakeup = true; } if (!sd->total_len) return 0; } return 1; } /* We know we have a pipe buffer, but maybe it's empty? */ static inline bool eat_empty_buffer(struct pipe_inode_info *pipe) { unsigned int tail = pipe->tail; unsigned int mask = pipe->ring_size - 1; struct pipe_buffer *buf = &pipe->bufs[tail & mask]; if (unlikely(!buf->len)) { pipe_buf_release(pipe, buf); pipe->tail = tail+1; return true; } return false; } /** * splice_from_pipe_next - wait for some data to splice from * @pipe: pipe to splice from * @sd: information about the splice operation * * Description: * This function will wait for some data and return a positive * value (one) if pipe buffers are available. It will return zero * or -errno if no more data needs to be spliced. */ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) { /* * Check for signal early to make process killable when there are * always buffers available */ if (signal_pending(current)) return -ERESTARTSYS; repeat: while (pipe_empty(pipe->head, pipe->tail)) { if (!pipe->writers) return 0; if (sd->num_spliced) return 0; if (sd->flags & SPLICE_F_NONBLOCK) return -EAGAIN; if (signal_pending(current)) return -ERESTARTSYS; if (sd->need_wakeup) { wakeup_pipe_writers(pipe); sd->need_wakeup = false; } pipe_wait_readable(pipe); } if (eat_empty_buffer(pipe)) goto repeat; return 1; } /** * splice_from_pipe_begin - start splicing from pipe * @sd: information about the splice operation * * Description: * This function should be called before a loop containing * splice_from_pipe_next() and splice_from_pipe_feed() to * initialize the necessary fields of @sd. */ static void splice_from_pipe_begin(struct splice_desc *sd) { sd->num_spliced = 0; sd->need_wakeup = false; } /** * splice_from_pipe_end - finish splicing from pipe * @pipe: pipe to splice from * @sd: information about the splice operation * * Description: * This function will wake up pipe writers if necessary. It should * be called after a loop containing splice_from_pipe_next() and * splice_from_pipe_feed(). */ static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) { if (sd->need_wakeup) wakeup_pipe_writers(pipe); } /** * __splice_from_pipe - splice data from a pipe to given actor * @pipe: pipe to splice from * @sd: information to @actor * @actor: handler that splices the data * * Description: * This function does little more than loop over the pipe and call * @actor to do the actual moving of a single struct pipe_buffer to * the desired destination. See pipe_to_file, pipe_to_sendpage, or * pipe_to_user. * */ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, splice_actor *actor) { int ret; splice_from_pipe_begin(sd); do { cond_resched(); ret = splice_from_pipe_next(pipe, sd); if (ret > 0) ret = splice_from_pipe_feed(pipe, sd, actor); } while (ret > 0); splice_from_pipe_end(pipe, sd); return sd->num_spliced ? sd->num_spliced : ret; } EXPORT_SYMBOL(__splice_from_pipe); /** * splice_from_pipe - splice data from a pipe to a file * @pipe: pipe to splice from * @out: file to splice to * @ppos: position in @out * @len: how many bytes to splice * @flags: splice modifier flags * @actor: handler that splices the data * * Description: * See __splice_from_pipe. This function locks the pipe inode, * otherwise it's identical to __splice_from_pipe(). * */ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags, splice_actor *actor) { ssize_t ret; struct splice_desc sd = { .total_len = len, .flags = flags, .pos = *ppos, .u.file = out, }; pipe_lock(pipe); ret = __splice_from_pipe(pipe, &sd, actor); pipe_unlock(pipe); return ret; } /** * iter_file_splice_write - splice data from a pipe to a file * @pipe: pipe info * @out: file to write to * @ppos: position in @out * @len: number of bytes to splice * @flags: splice modifier flags * * Description: * Will either move or copy pages (determined by @flags options) from * the given pipe inode to the given file. * This one is ->write_iter-based. * */ ssize_t iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { struct splice_desc sd = { .total_len = len, .flags = flags, .pos = *ppos, .u.file = out, }; int nbufs = pipe->max_usage; struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec), GFP_KERNEL); ssize_t ret; if (unlikely(!array)) return -ENOMEM; pipe_lock(pipe); splice_from_pipe_begin(&sd); while (sd.total_len) { struct iov_iter from; unsigned int head, tail, mask; size_t left; int n; ret = splice_from_pipe_next(pipe, &sd); if (ret <= 0) break; if (unlikely(nbufs < pipe->max_usage)) { kfree(array); nbufs = pipe->max_usage; array = kcalloc(nbufs, sizeof(struct bio_vec), GFP_KERNEL); if (!array) { ret = -ENOMEM; break; } } head = pipe->head; tail = pipe->tail; mask = pipe->ring_size - 1; /* build the vector */ left = sd.total_len; for (n = 0; !pipe_empty(head, tail) && left && n < nbufs; tail++, n++) { struct pipe_buffer *buf = &pipe->bufs[tail & mask]; size_t this_len = buf->len; if (this_len > left) this_len = left; ret = pipe_buf_confirm(pipe, buf); if (unlikely(ret)) { if (ret == -ENODATA) ret = 0; goto done; } array[n].bv_page = buf->page; array[n].bv_len = this_len; array[n].bv_offset = buf->offset; left -= this_len; } iov_iter_bvec(&from, WRITE, array, n, sd.total_len - left); ret = vfs_iter_write(out, &from, &sd.pos, 0); if (ret <= 0) break; sd.num_spliced += ret; sd.total_len -= ret; *ppos = sd.pos; /* dismiss the fully eaten buffers, adjust the partial one */ tail = pipe->tail; while (ret) { struct pipe_buffer *buf = &pipe->bufs[tail & mask]; if (ret >= buf->len) { ret -= buf->len; buf->len = 0; pipe_buf_release(pipe, buf); tail++; pipe->tail = tail; if (pipe->files) sd.need_wakeup = true; } else { buf->offset += ret; buf->len -= ret; ret = 0; } } } done: kfree(array); splice_from_pipe_end(pipe, &sd); pipe_unlock(pipe); if (sd.num_spliced) ret = sd.num_spliced; return ret; } EXPORT_SYMBOL(iter_file_splice_write); /** * generic_splice_sendpage - splice data from a pipe to a socket * @pipe: pipe to splice from * @out: socket to write to * @ppos: position in @out * @len: number of bytes to splice * @flags: splice modifier flags * * Description: * Will send @len bytes from the pipe to a network socket. No data copying * is involved. * */ ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); } EXPORT_SYMBOL(generic_splice_sendpage); static int warn_unsupported(struct file *file, const char *op) { pr_debug_ratelimited( "splice %s not supported for file %pD4 (pid: %d comm: %.20s)\n", op, file, current->pid, current->comm); return -EINVAL; } /* * Attempt to initiate a splice from pipe to file. */ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { if (unlikely(!out->f_op->splice_write)) return warn_unsupported(out, "write"); return out->f_op->splice_write(pipe, out, ppos, len, flags); } /* * Attempt to initiate a splice from a file to a pipe. */ static long do_splice_to(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { int ret; if (unlikely(!(in->f_mode & FMODE_READ))) return -EBADF; ret = rw_verify_area(READ, in, ppos, len); if (unlikely(ret < 0)) return ret; if (unlikely(len > MAX_RW_COUNT)) len = MAX_RW_COUNT; if (unlikely(!in->f_op->splice_read)) return warn_unsupported(in, "read"); return in->f_op->splice_read(in, ppos, pipe, len, flags); } /** * splice_direct_to_actor - splices data directly between two non-pipes * @in: file to splice from * @sd: actor information on where to splice to * @actor: handles the data splicing * * Description: * This is a special case helper to splice directly between two * points, without requiring an explicit pipe. Internally an allocated * pipe is cached in the process, and reused during the lifetime of * that process. * */ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, splice_direct_actor *actor) { struct pipe_inode_info *pipe; long ret, bytes; umode_t i_mode; size_t len; int i, flags, more; /* * We require the input being a regular file, as we don't want to * randomly drop data for eg socket -> socket splicing. Use the * piped splicing for that! */ i_mode = file_inode(in)->i_mode; if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode))) return -EINVAL; /* * neither in nor out is a pipe, setup an internal pipe attached to * 'out' and transfer the wanted data from 'in' to 'out' through that */ pipe = current->splice_pipe; if (unlikely(!pipe)) { pipe = alloc_pipe_info(); if (!pipe) return -ENOMEM; /* * We don't have an immediate reader, but we'll read the stuff * out of the pipe right after the splice_to_pipe(). So set * PIPE_READERS appropriately. */ pipe->readers = 1; current->splice_pipe = pipe; } /* * Do the splice. */ ret = 0; bytes = 0; len = sd->total_len; flags = sd->flags; /* * Don't block on output, we have to drain the direct pipe. */ sd->flags &= ~SPLICE_F_NONBLOCK; more = sd->flags & SPLICE_F_MORE; WARN_ON_ONCE(!pipe_empty(pipe->head, pipe->tail)); while (len) { unsigned int p_space; size_t read_len; loff_t pos = sd->pos, prev_pos = pos; /* Don't try to read more the pipe has space for. */ p_space = pipe->max_usage - pipe_occupancy(pipe->head, pipe->tail); read_len = min_t(size_t, len, p_space << PAGE_SHIFT); ret = do_splice_to(in, &pos, pipe, read_len, flags); if (unlikely(ret <= 0)) goto out_release; read_len = ret; sd->total_len = read_len; /* * If more data is pending, set SPLICE_F_MORE * If this is the last data and SPLICE_F_MORE was not set * initially, clears it. */ if (read_len < len) sd->flags |= SPLICE_F_MORE; else if (!more) sd->flags &= ~SPLICE_F_MORE; /* * NOTE: nonblocking mode only applies to the input. We * must not do the output in nonblocking mode as then we * could get stuck data in the internal pipe: */ ret = actor(pipe, sd); if (unlikely(ret <= 0)) { sd->pos = prev_pos; goto out_release; } bytes += ret; len -= ret; sd->pos = pos; if (ret < read_len) { sd->pos = prev_pos + ret; goto out_release; } } done: pipe->tail = pipe->head = 0; file_accessed(in); return bytes; out_release: /* * If we did an incomplete transfer we must release * the pipe buffers in question: */ for (i = 0; i < pipe->ring_size; i++) { struct pipe_buffer *buf = &pipe->bufs[i]; if (buf->ops) pipe_buf_release(pipe, buf); } if (!bytes) bytes = ret; goto done; } EXPORT_SYMBOL(splice_direct_to_actor); static int direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd) { struct file *file = sd->u.file; return do_splice_from(pipe, file, sd->opos, sd->total_len, sd->flags); } /** * do_splice_direct - splices data directly between two files * @in: file to splice from * @ppos: input file offset * @out: file to splice to * @opos: output file offset * @len: number of bytes to splice * @flags: splice modifier flags * * Description: * For use by do_sendfile(). splice can easily emulate sendfile, but * doing it in the application would incur an extra system call * (splice in + splice out, as compared to just sendfile()). So this helper * can splice directly through a process-private pipe. * */ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, loff_t *opos, size_t len, unsigned int flags) { struct splice_desc sd = { .len = len, .total_len = len, .flags = flags, .pos = *ppos, .u.file = out, .opos = opos, }; long ret; if (unlikely(!(out->f_mode & FMODE_WRITE))) return -EBADF; if (unlikely(out->f_flags & O_APPEND)) return -EINVAL; ret = rw_verify_area(WRITE, out, opos, len); if (unlikely(ret < 0)) return ret; ret = splice_direct_to_actor(in, &sd, direct_splice_actor); if (ret > 0) *ppos = sd.pos; return ret; } EXPORT_SYMBOL(do_splice_direct); static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) { for (;;) { if (unlikely(!pipe->readers)) { send_sig(SIGPIPE, current, 0); return -EPIPE; } if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) return 0; if (flags & SPLICE_F_NONBLOCK) return -EAGAIN; if (signal_pending(current)) return -ERESTARTSYS; pipe_wait_writable(pipe); } } static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, struct pipe_inode_info *opipe, size_t len, unsigned int flags); /* * Determine where to splice to/from. */ long do_splice(struct file *in, loff_t *off_in, struct file *out, loff_t *off_out, size_t len, unsigned int flags) { struct pipe_inode_info *ipipe; struct pipe_inode_info *opipe; loff_t offset; long ret; if (unlikely(!(in->f_mode & FMODE_READ) || !(out->f_mode & FMODE_WRITE))) return -EBADF; ipipe = get_pipe_info(in, true); opipe = get_pipe_info(out, true); if (ipipe && opipe) { if (off_in || off_out) return -ESPIPE; /* Splicing to self would be fun, but... */ if (ipipe == opipe) return -EINVAL; if ((in->f_flags | out->f_flags) & O_NONBLOCK) flags |= SPLICE_F_NONBLOCK; return splice_pipe_to_pipe(ipipe, opipe, len, flags); } if (ipipe) { if (off_in) return -ESPIPE; if (off_out) { if (!(out->f_mode & FMODE_PWRITE)) return -EINVAL; offset = *off_out; } else { offset = out->f_pos; } if (unlikely(out->f_flags & O_APPEND)) return -EINVAL; ret = rw_verify_area(WRITE, out, &offset, len); if (unlikely(ret < 0)) return ret; if (in->f_flags & O_NONBLOCK) flags |= SPLICE_F_NONBLOCK; file_start_write(out); ret = do_splice_from(ipipe, out, &offset, len, flags); file_end_write(out); if (!off_out) out->f_pos = offset; else *off_out = offset; return ret; } if (opipe) { if (off_out) return -ESPIPE; if (off_in) { if (!(in->f_mode & FMODE_PREAD)) return -EINVAL; offset = *off_in; } else { offset = in->f_pos; } if (out->f_flags & O_NONBLOCK) flags |= SPLICE_F_NONBLOCK; pipe_lock(opipe); ret = wait_for_space(opipe, flags); if (!ret) { unsigned int p_space; /* Don't try to read more the pipe has space for. */ p_space = opipe->max_usage - pipe_occupancy(opipe->head, opipe->tail); len = min_t(size_t, len, p_space << PAGE_SHIFT); ret = do_splice_to(in, &offset, opipe, len, flags); } pipe_unlock(opipe); if (ret > 0) wakeup_pipe_readers(opipe); if (!off_in) in->f_pos = offset; else *off_in = offset; return ret; } return -EINVAL; } static long __do_splice(struct file *in, loff_t __user *off_in, struct file *out, loff_t __user *off_out, size_t len, unsigned int flags) { struct pipe_inode_info *ipipe; struct pipe_inode_info *opipe; loff_t offset, *__off_in = NULL, *__off_out = NULL; long ret; ipipe = get_pipe_info(in, true); opipe = get_pipe_info(out, true); if (ipipe && off_in) return -ESPIPE; if (opipe && off_out) return -ESPIPE; if (off_out) { if (copy_from_user(&offset, off_out, sizeof(loff_t))) return -EFAULT; __off_out = &offset; } if (off_in) { if (copy_from_user(&offset, off_in, sizeof(loff_t))) return -EFAULT; __off_in = &offset; } ret = do_splice(in, __off_in, out, __off_out, len, flags); if (ret < 0) return ret; if (__off_out && copy_to_user(off_out, __off_out, sizeof(loff_t))) return -EFAULT; if (__off_in && copy_to_user(off_in, __off_in, sizeof(loff_t))) return -EFAULT; return ret; } static int iter_to_pipe(struct iov_iter *from, struct pipe_inode_info *pipe, unsigned flags) { struct pipe_buffer buf = { .ops = &user_page_pipe_buf_ops, .flags = flags }; size_t total = 0; int ret = 0; bool failed = false; while (iov_iter_count(from) && !failed) { struct page *pages[16]; ssize_t copied; size_t start; int n; copied = iov_iter_get_pages(from, pages, ~0UL, 16, &start); if (copied <= 0) { ret = copied; break; } for (n = 0; copied; n++, start = 0) { int size = min_t(int, copied, PAGE_SIZE - start); if (!failed) { buf.page = pages[n]; buf.offset = start; buf.len = size; ret = add_to_pipe(pipe, &buf); if (unlikely(ret < 0)) { failed = true; } else { iov_iter_advance(from, ret); total += ret; } } else { put_page(pages[n]); } copied -= size; } } return total ? total : ret; } static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { int n = copy_page_to_iter(buf->page, buf->offset, sd->len, sd->u.data); return n == sd->len ? n : -EFAULT; } /* * For lack of a better implementation, implement vmsplice() to userspace * as a simple copy of the pipes pages to the user iov. */ static long vmsplice_to_user(struct file *file, struct iov_iter *iter, unsigned int flags) { struct pipe_inode_info *pipe = get_pipe_info(file, true); struct splice_desc sd = { .total_len = iov_iter_count(iter), .flags = flags, .u.data = iter }; long ret = 0; if (!pipe) return -EBADF; if (sd.total_len) { pipe_lock(pipe); ret = __splice_from_pipe(pipe, &sd, pipe_to_user); pipe_unlock(pipe); } return ret; } /* * vmsplice splices a user address range into a pipe. It can be thought of * as splice-from-memory, where the regular splice is splice-from-file (or * to file). In both cases the output is a pipe, naturally. */ static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter, unsigned int flags) { struct pipe_inode_info *pipe; long ret = 0; unsigned buf_flag = 0; if (flags & SPLICE_F_GIFT) buf_flag = PIPE_BUF_FLAG_GIFT; pipe = get_pipe_info(file, true); if (!pipe) return -EBADF; pipe_lock(pipe); ret = wait_for_space(pipe, flags); if (!ret) ret = iter_to_pipe(iter, pipe, buf_flag); pipe_unlock(pipe); if (ret > 0) wakeup_pipe_readers(pipe); return ret; } static int vmsplice_type(struct fd f, int *type) { if (!f.file) return -EBADF; if (f.file->f_mode & FMODE_WRITE) { *type = WRITE; } else if (f.file->f_mode & FMODE_READ) { *type = READ; } else { fdput(f); return -EBADF; } return 0; } /* * Note that vmsplice only really supports true splicing _from_ user memory * to a pipe, not the other way around. Splicing from user memory is a simple * operation that can be supported without any funky alignment restrictions * or nasty vm tricks. We simply map in the user memory and fill them into * a pipe. The reverse isn't quite as easy, though. There are two possible * solutions for that: * * - memcpy() the data internally, at which point we might as well just * do a regular read() on the buffer anyway. * - Lots of nasty vm tricks, that are neither fast nor flexible (it * has restriction limitations on both ends of the pipe). * * Currently we punt and implement it as a normal copy, see pipe_to_user(). * */ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, unsigned long, nr_segs, unsigned int, flags) { struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; ssize_t error; struct fd f; int type; if (unlikely(flags & ~SPLICE_F_ALL)) return -EINVAL; f = fdget(fd); error = vmsplice_type(f, &type); if (error) return error; error = import_iovec(type, uiov, nr_segs, ARRAY_SIZE(iovstack), &iov, &iter); if (error < 0) goto out_fdput; if (!iov_iter_count(&iter)) error = 0; else if (iov_iter_rw(&iter) == WRITE) error = vmsplice_to_pipe(f.file, &iter, flags); else error = vmsplice_to_user(f.file, &iter, flags); kfree(iov); out_fdput: fdput(f); return error; } SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags) { struct fd in, out; long error; if (unlikely(!len)) return 0; if (unlikely(flags & ~SPLICE_F_ALL)) return -EINVAL; error = -EBADF; in = fdget(fd_in); if (in.file) { out = fdget(fd_out); if (out.file) { error = __do_splice(in.file, off_in, out.file, off_out, len, flags); fdput(out); } fdput(in); } return error; } /* * Make sure there's data to read. Wait for input if we can, otherwise * return an appropriate error. */ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) { int ret; /* * Check the pipe occupancy without the inode lock first. This function * is speculative anyways, so missing one is ok. */ if (!pipe_empty(pipe->head, pipe->tail)) return 0; ret = 0; pipe_lock(pipe); while (pipe_empty(pipe->head, pipe->tail)) { if (signal_pending(current)) { ret = -ERESTARTSYS; break; } if (!pipe->writers) break; if (flags & SPLICE_F_NONBLOCK) { ret = -EAGAIN; break; } pipe_wait_readable(pipe); } pipe_unlock(pipe); return ret; } /* * Make sure there's writeable room. Wait for room if we can, otherwise * return an appropriate error. */ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) { int ret; /* * Check pipe occupancy without the inode lock first. This function * is speculative anyways, so missing one is ok. */ if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) return 0; ret = 0; pipe_lock(pipe); while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { if (!pipe->readers) { send_sig(SIGPIPE, current, 0); ret = -EPIPE; break; } if (flags & SPLICE_F_NONBLOCK) { ret = -EAGAIN; break; } if (signal_pending(current)) { ret = -ERESTARTSYS; break; } pipe_wait_writable(pipe); } pipe_unlock(pipe); return ret; } /* * Splice contents of ipipe to opipe. */ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, struct pipe_inode_info *opipe, size_t len, unsigned int flags) { struct pipe_buffer *ibuf, *obuf; unsigned int i_head, o_head; unsigned int i_tail, o_tail; unsigned int i_mask, o_mask; int ret = 0; bool input_wakeup = false; retry: ret = ipipe_prep(ipipe, flags); if (ret) return ret; ret = opipe_prep(opipe, flags); if (ret) return ret; /* * Potential ABBA deadlock, work around it by ordering lock * grabbing by pipe info address. Otherwise two different processes * could deadlock (one doing tee from A -> B, the other from B -> A). */ pipe_double_lock(ipipe, opipe); i_tail = ipipe->tail; i_mask = ipipe->ring_size - 1; o_head = opipe->head; o_mask = opipe->ring_size - 1; do { size_t o_len; if (!opipe->readers) { send_sig(SIGPIPE, current, 0); if (!ret) ret = -EPIPE; break; } i_head = ipipe->head; o_tail = opipe->tail; if (pipe_empty(i_head, i_tail) && !ipipe->writers) break; /* * Cannot make any progress, because either the input * pipe is empty or the output pipe is full. */ if (pipe_empty(i_head, i_tail) || pipe_full(o_head, o_tail, opipe->max_usage)) { /* Already processed some buffers, break */ if (ret) break; if (flags & SPLICE_F_NONBLOCK) { ret = -EAGAIN; break; } /* * We raced with another reader/writer and haven't * managed to process any buffers. A zero return * value means EOF, so retry instead. */ pipe_unlock(ipipe); pipe_unlock(opipe); goto retry; } ibuf = &ipipe->bufs[i_tail & i_mask]; obuf = &opipe->bufs[o_head & o_mask]; if (len >= ibuf->len) { /* * Simply move the whole buffer from ipipe to opipe */ *obuf = *ibuf; ibuf->ops = NULL; i_tail++; ipipe->tail = i_tail; input_wakeup = true; o_len = obuf->len; o_head++; opipe->head = o_head; } else { /* * Get a reference to this pipe buffer, * so we can copy the contents over. */ if (!pipe_buf_get(ipipe, ibuf)) { if (ret == 0) ret = -EFAULT; break; } *obuf = *ibuf; /* * Don't inherit the gift and merge flags, we need to * prevent multiple steals of this page. */ obuf->flags &= ~PIPE_BUF_FLAG_GIFT; obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE; obuf->len = len; ibuf->offset += len; ibuf->len -= len; o_len = len; o_head++; opipe->head = o_head; } ret += o_len; len -= o_len; } while (len); pipe_unlock(ipipe); pipe_unlock(opipe); /* * If we put data in the output pipe, wakeup any potential readers. */ if (ret > 0) wakeup_pipe_readers(opipe); if (input_wakeup) wakeup_pipe_writers(ipipe); return ret; } /* * Link contents of ipipe to opipe. */ static int link_pipe(struct pipe_inode_info *ipipe, struct pipe_inode_info *opipe, size_t len, unsigned int flags) { struct pipe_buffer *ibuf, *obuf; unsigned int i_head, o_head; unsigned int i_tail, o_tail; unsigned int i_mask, o_mask; int ret = 0; /* * Potential ABBA deadlock, work around it by ordering lock * grabbing by pipe info address. Otherwise two different processes * could deadlock (one doing tee from A -> B, the other from B -> A). */ pipe_double_lock(ipipe, opipe); i_tail = ipipe->tail; i_mask = ipipe->ring_size - 1; o_head = opipe->head; o_mask = opipe->ring_size - 1; do { if (!opipe->readers) { send_sig(SIGPIPE, current, 0); if (!ret) ret = -EPIPE; break; } i_head = ipipe->head; o_tail = opipe->tail; /* * If we have iterated all input buffers or run out of * output room, break. */ if (pipe_empty(i_head, i_tail) || pipe_full(o_head, o_tail, opipe->max_usage)) break; ibuf = &ipipe->bufs[i_tail & i_mask]; obuf = &opipe->bufs[o_head & o_mask]; /* * Get a reference to this pipe buffer, * so we can copy the contents over. */ if (!pipe_buf_get(ipipe, ibuf)) { if (ret == 0) ret = -EFAULT; break; } *obuf = *ibuf; /* * Don't inherit the gift and merge flag, we need to prevent * multiple steals of this page. */ obuf->flags &= ~PIPE_BUF_FLAG_GIFT; obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE; if (obuf->len > len) obuf->len = len; ret += obuf->len; len -= obuf->len; o_head++; opipe->head = o_head; i_tail++; } while (len); pipe_unlock(ipipe); pipe_unlock(opipe); /* * If we put data in the output pipe, wakeup any potential readers. */ if (ret > 0) wakeup_pipe_readers(opipe); return ret; } /* * This is a tee(1) implementation that works on pipes. It doesn't copy * any data, it simply references the 'in' pages on the 'out' pipe. * The 'flags' used are the SPLICE_F_* variants, currently the only * applicable one is SPLICE_F_NONBLOCK. */ long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags) { struct pipe_inode_info *ipipe = get_pipe_info(in, true); struct pipe_inode_info *opipe = get_pipe_info(out, true); int ret = -EINVAL; if (unlikely(!(in->f_mode & FMODE_READ) || !(out->f_mode & FMODE_WRITE))) return -EBADF; /* * Duplicate the contents of ipipe to opipe without actually * copying the data. */ if (ipipe && opipe && ipipe != opipe) { if ((in->f_flags | out->f_flags) & O_NONBLOCK) flags |= SPLICE_F_NONBLOCK; /* * Keep going, unless we encounter an error. The ipipe/opipe * ordering doesn't really matter. */ ret = ipipe_prep(ipipe, flags); if (!ret) { ret = opipe_prep(opipe, flags); if (!ret) ret = link_pipe(ipipe, opipe, len, flags); } } return ret; } SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) { struct fd in, out; int error; if (unlikely(flags & ~SPLICE_F_ALL)) return -EINVAL; if (unlikely(!len)) return 0; error = -EBADF; in = fdget(fdin); if (in.file) { out = fdget(fdout); if (out.file) { error = do_tee(in.file, out.file, len, flags); fdput(out); } fdput(in); } return error; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NVRAM_H #define _LINUX_NVRAM_H #include <linux/errno.h> #include <uapi/linux/nvram.h> #ifdef CONFIG_PPC #include <asm/machdep.h> #endif /** * struct nvram_ops - NVRAM functionality made available to drivers * @read: validate checksum (if any) then load a range of bytes from NVRAM * @write: store a range of bytes to NVRAM then update checksum (if any) * @read_byte: load a single byte from NVRAM * @write_byte: store a single byte to NVRAM * @get_size: return the fixed number of bytes in the NVRAM * * Architectures which provide an nvram ops struct need not implement all * of these methods. If the NVRAM hardware can be accessed only one byte * at a time then it may be sufficient to provide .read_byte and .write_byte. * If the NVRAM has a checksum (and it is to be checked) the .read and * .write methods can be used to implement that efficiently. * * Portable drivers may use the wrapper functions defined here. * The nvram_read() and nvram_write() functions call the .read and .write * methods when available and fall back on the .read_byte and .write_byte * methods otherwise. */ struct nvram_ops { ssize_t (*get_size)(void); unsigned char (*read_byte)(int); void (*write_byte)(unsigned char, int); ssize_t (*read)(char *, size_t, loff_t *); ssize_t (*write)(char *, size_t, loff_t *); #if defined(CONFIG_X86) || defined(CONFIG_M68K) long (*initialize)(void); long (*set_checksum)(void); #endif }; extern const struct nvram_ops arch_nvram_ops; static inline ssize_t nvram_get_size(void) { #ifdef CONFIG_PPC if (ppc_md.nvram_size) return ppc_md.nvram_size(); #else if (arch_nvram_ops.get_size) return arch_nvram_ops.get_size(); #endif return -ENODEV; } static inline unsigned char nvram_read_byte(int addr) { #ifdef CONFIG_PPC if (ppc_md.nvram_read_val) return ppc_md.nvram_read_val(addr); #else if (arch_nvram_ops.read_byte) return arch_nvram_ops.read_byte(addr); #endif return 0xFF; } static inline void nvram_write_byte(unsigned char val, int addr) { #ifdef CONFIG_PPC if (ppc_md.nvram_write_val) ppc_md.nvram_write_val(addr, val); #else if (arch_nvram_ops.write_byte) arch_nvram_ops.write_byte(val, addr); #endif } static inline ssize_t nvram_read_bytes(char *buf, size_t count, loff_t *ppos) { ssize_t nvram_size = nvram_get_size(); loff_t i; char *p = buf; if (nvram_size < 0) return nvram_size; for (i = *ppos; count > 0 && i < nvram_size; ++i, ++p, --count) *p = nvram_read_byte(i); *ppos = i; return p - buf; } static inline ssize_t nvram_write_bytes(char *buf, size_t count, loff_t *ppos) { ssize_t nvram_size = nvram_get_size(); loff_t i; char *p = buf; if (nvram_size < 0) return nvram_size; for (i = *ppos; count > 0 && i < nvram_size; ++i, ++p, --count) nvram_write_byte(*p, i); *ppos = i; return p - buf; } static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos) { #ifdef CONFIG_PPC if (ppc_md.nvram_read) return ppc_md.nvram_read(buf, count, ppos); #else if (arch_nvram_ops.read) return arch_nvram_ops.read(buf, count, ppos); #endif return nvram_read_bytes(buf, count, ppos); } static inline ssize_t nvram_write(char *buf, size_t count, loff_t *ppos) { #ifdef CONFIG_PPC if (ppc_md.nvram_write) return ppc_md.nvram_write(buf, count, ppos); #else if (arch_nvram_ops.write) return arch_nvram_ops.write(buf, count, ppos); #endif return nvram_write_bytes(buf, count, ppos); } #endif /* _LINUX_NVRAM_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _KBD_KERN_H #define _KBD_KERN_H #include <linux/tty.h> #include <linux/interrupt.h> #include <linux/keyboard.h> extern struct tasklet_struct keyboard_tasklet; extern char *func_table[MAX_NR_FUNC]; extern char func_buf[]; extern char *funcbufptr; extern int funcbufsize, funcbufleft; /* * kbd->xxx contains the VC-local things (flag settings etc..) * * Note: externally visible are LED_SCR, LED_NUM, LED_CAP defined in kd.h * The code in KDGETLED / KDSETLED depends on the internal and * external order being the same. * * Note: lockstate is used as index in the array key_map. */ struct kbd_struct { unsigned char lockstate; /* 8 modifiers - the names do not have any meaning at all; they can be associated to arbitrarily chosen keys */ #define VC_SHIFTLOCK KG_SHIFT /* shift lock mode */ #define VC_ALTGRLOCK KG_ALTGR /* altgr lock mode */ #define VC_CTRLLOCK KG_CTRL /* control lock mode */ #define VC_ALTLOCK KG_ALT /* alt lock mode */ #define VC_SHIFTLLOCK KG_SHIFTL /* shiftl lock mode */ #define VC_SHIFTRLOCK KG_SHIFTR /* shiftr lock mode */ #define VC_CTRLLLOCK KG_CTRLL /* ctrll lock mode */ #define VC_CTRLRLOCK KG_CTRLR /* ctrlr lock mode */ unsigned char slockstate; /* for `sticky' Shift, Ctrl, etc. */ unsigned char ledmode:1; #define LED_SHOW_FLAGS 0 /* traditional state */ #define LED_SHOW_IOCTL 1 /* only change leds upon ioctl */ unsigned char ledflagstate:4; /* flags, not lights */ unsigned char default_ledflagstate:4; #define VC_SCROLLOCK 0 /* scroll-lock mode */ #define VC_NUMLOCK 1 /* numeric lock mode */ #define VC_CAPSLOCK 2 /* capslock mode */ #define VC_KANALOCK 3 /* kanalock mode */ unsigned char kbdmode:3; /* one 3-bit value */ #define VC_XLATE 0 /* translate keycodes using keymap */ #define VC_MEDIUMRAW 1 /* medium raw (keycode) mode */ #define VC_RAW 2 /* raw (scancode) mode */ #define VC_UNICODE 3 /* Unicode mode */ #define VC_OFF 4 /* disabled mode */ unsigned char modeflags:5; #define VC_APPLIC 0 /* application key mode */ #define VC_CKMODE 1 /* cursor key mode */ #define VC_REPEAT 2 /* keyboard repeat */ #define VC_CRLF 3 /* 0 - enter sends CR, 1 - enter sends CRLF */ #define VC_META 4 /* 0 - meta, 1 - meta=prefix with ESC */ }; extern int kbd_init(void); extern void setledstate(struct kbd_struct *kbd, unsigned int led); extern int do_poke_blanked_console; extern void (*kbd_ledfunc)(unsigned int led); extern int set_console(int nr); extern void schedule_console_callback(void); /* FIXME: review locking for vt.c callers */ static inline void set_leds(void) { tasklet_schedule(&keyboard_tasklet); } static inline int vc_kbd_mode(struct kbd_struct * kbd, int flag) { return ((kbd->modeflags >> flag) & 1); } static inline int vc_kbd_led(struct kbd_struct * kbd, int flag) { return ((kbd->ledflagstate >> flag) & 1); } static inline void set_vc_kbd_mode(struct kbd_struct * kbd, int flag) { kbd->modeflags |= 1 << flag; } static inline void set_vc_kbd_led(struct kbd_struct * kbd, int flag) { kbd->ledflagstate |= 1 << flag; } static inline void clr_vc_kbd_mode(struct kbd_struct * kbd, int flag) { kbd->modeflags &= ~(1 << flag); } static inline void clr_vc_kbd_led(struct kbd_struct * kbd, int flag) { kbd->ledflagstate &= ~(1 << flag); } static inline void chg_vc_kbd_lock(struct kbd_struct * kbd, int flag) { kbd->lockstate ^= 1 << flag; } static inline void chg_vc_kbd_slock(struct kbd_struct * kbd, int flag) { kbd->slockstate ^= 1 << flag; } static inline void chg_vc_kbd_mode(struct kbd_struct * kbd, int flag) { kbd->modeflags ^= 1 << flag; } static inline void chg_vc_kbd_led(struct kbd_struct * kbd, int flag) { kbd->ledflagstate ^= 1 << flag; } #define U(x) ((x) ^ 0xf000) #define BRL_UC_ROW 0x2800 /* keyboard.c */ struct console; void compute_shiftstate(void); /* defkeymap.c */ extern unsigned int keymap_count; #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM workqueue #if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_WORKQUEUE_H #include <linux/tracepoint.h> #include <linux/workqueue.h> struct pool_workqueue; /** * workqueue_queue_work - called when a work gets queued * @req_cpu: the requested cpu * @pwq: pointer to struct pool_workqueue * @work: pointer to struct work_struct * * This event occurs when a work is queued immediately or once a * delayed work is actually queued on a workqueue (ie: once the delay * has been reached). */ TRACE_EVENT(workqueue_queue_work, TP_PROTO(unsigned int req_cpu, struct pool_workqueue *pwq, struct work_struct *work), TP_ARGS(req_cpu, pwq, work), TP_STRUCT__entry( __field( void *, work ) __field( void *, function) __field( void *, workqueue) __field( unsigned int, req_cpu ) __field( unsigned int, cpu ) ), TP_fast_assign( __entry->work = work; __entry->function = work->func; __entry->workqueue = pwq->wq; __entry->req_cpu = req_cpu; __entry->cpu = pwq->pool->cpu; ), TP_printk("work struct=%p function=%ps workqueue=%p req_cpu=%u cpu=%u", __entry->work, __entry->function, __entry->workqueue, __entry->req_cpu, __entry->cpu) ); /** * workqueue_activate_work - called when a work gets activated * @work: pointer to struct work_struct * * This event occurs when a queued work is put on the active queue, * which happens immediately after queueing unless @max_active limit * is reached. */ TRACE_EVENT(workqueue_activate_work, TP_PROTO(struct work_struct *work), TP_ARGS(work), TP_STRUCT__entry( __field( void *, work ) ), TP_fast_assign( __entry->work = work; ), TP_printk("work struct %p", __entry->work) ); /** * workqueue_execute_start - called immediately before the workqueue callback * @work: pointer to struct work_struct * * Allows to track workqueue execution. */ TRACE_EVENT(workqueue_execute_start, TP_PROTO(struct work_struct *work), TP_ARGS(work), TP_STRUCT__entry( __field( void *, work ) __field( void *, function) ), TP_fast_assign( __entry->work = work; __entry->function = work->func; ), TP_printk("work struct %p: function %ps", __entry->work, __entry->function) ); /** * workqueue_execute_end - called immediately after the workqueue callback * @work: pointer to struct work_struct * @function: pointer to worker function * * Allows to track workqueue execution. */ TRACE_EVENT(workqueue_execute_end, TP_PROTO(struct work_struct *work, work_func_t function), TP_ARGS(work, function), TP_STRUCT__entry( __field( void *, work ) __field( void *, function) ), TP_fast_assign( __entry->work = work; __entry->function = function; ), TP_printk("work struct %p: function %ps", __entry->work, __entry->function) ); #endif /* _TRACE_WORKQUEUE_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the Forwarding Information Base. * * Authors: A.N.Kuznetsov, <kuznet@ms2.inr.ac.ru> */ #ifndef _NET_IP_FIB_H #define _NET_IP_FIB_H #include <net/flow.h> #include <linux/seq_file.h> #include <linux/rcupdate.h> #include <net/fib_notifier.h> #include <net/fib_rules.h> #include <net/inetpeer.h> #include <linux/percpu.h> #include <linux/notifier.h> #include <linux/refcount.h> struct fib_config { u8 fc_dst_len; u8 fc_tos; u8 fc_protocol; u8 fc_scope; u8 fc_type; u8 fc_gw_family; /* 2 bytes unused */ u32 fc_table; __be32 fc_dst; union { __be32 fc_gw4; struct in6_addr fc_gw6; }; int fc_oif; u32 fc_flags; u32 fc_priority; __be32 fc_prefsrc; u32 fc_nh_id; struct nlattr *fc_mx; struct rtnexthop *fc_mp; int fc_mx_len; int fc_mp_len; u32 fc_flow; u32 fc_nlflags; struct nl_info fc_nlinfo; struct nlattr *fc_encap; u16 fc_encap_type; }; struct fib_info; struct rtable; struct fib_nh_exception { struct fib_nh_exception __rcu *fnhe_next; int fnhe_genid; __be32 fnhe_daddr; u32 fnhe_pmtu; bool fnhe_mtu_locked; __be32 fnhe_gw; unsigned long fnhe_expires; struct rtable __rcu *fnhe_rth_input; struct rtable __rcu *fnhe_rth_output; unsigned long fnhe_stamp; struct rcu_head rcu; }; struct fnhe_hash_bucket { struct fib_nh_exception __rcu *chain; }; #define FNHE_HASH_SHIFT 11 #define FNHE_HASH_SIZE (1 << FNHE_HASH_SHIFT) #define FNHE_RECLAIM_DEPTH 5 struct fib_nh_common { struct net_device *nhc_dev; int nhc_oif; unsigned char nhc_scope; u8 nhc_family; u8 nhc_gw_family; unsigned char nhc_flags; struct lwtunnel_state *nhc_lwtstate; union { __be32 ipv4; struct in6_addr ipv6; } nhc_gw; int nhc_weight; atomic_t nhc_upper_bound; /* v4 specific, but allows fib6_nh with v4 routes */ struct rtable __rcu * __percpu *nhc_pcpu_rth_output; struct rtable __rcu *nhc_rth_input; struct fnhe_hash_bucket __rcu *nhc_exceptions; }; struct fib_nh { struct fib_nh_common nh_common; struct hlist_node nh_hash; struct fib_info *nh_parent; #ifdef CONFIG_IP_ROUTE_CLASSID __u32 nh_tclassid; #endif __be32 nh_saddr; int nh_saddr_genid; #define fib_nh_family nh_common.nhc_family #define fib_nh_dev nh_common.nhc_dev #define fib_nh_oif nh_common.nhc_oif #define fib_nh_flags nh_common.nhc_flags #define fib_nh_lws nh_common.nhc_lwtstate #define fib_nh_scope nh_common.nhc_scope #define fib_nh_gw_family nh_common.nhc_gw_family #define fib_nh_gw4 nh_common.nhc_gw.ipv4 #define fib_nh_gw6 nh_common.nhc_gw.ipv6 #define fib_nh_weight nh_common.nhc_weight #define fib_nh_upper_bound nh_common.nhc_upper_bound }; /* * This structure contains data shared by many of routes. */ struct nexthop; struct fib_info { struct hlist_node fib_hash; struct hlist_node fib_lhash; struct list_head nh_list; struct net *fib_net; int fib_treeref; refcount_t fib_clntref; unsigned int fib_flags; unsigned char fib_dead; unsigned char fib_protocol; unsigned char fib_scope; unsigned char fib_type; __be32 fib_prefsrc; u32 fib_tb_id; u32 fib_priority; struct dst_metrics *fib_metrics; #define fib_mtu fib_metrics->metrics[RTAX_MTU-1] #define fib_window fib_metrics->metrics[RTAX_WINDOW-1] #define fib_rtt fib_metrics->metrics[RTAX_RTT-1] #define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] int fib_nhs; bool fib_nh_is_v6; bool nh_updated; struct nexthop *nh; struct rcu_head rcu; struct fib_nh fib_nh[]; }; #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rule; #endif struct fib_table; struct fib_result { __be32 prefix; unsigned char prefixlen; unsigned char nh_sel; unsigned char type; unsigned char scope; u32 tclassid; struct fib_nh_common *nhc; struct fib_info *fi; struct fib_table *table; struct hlist_head *fa_head; }; struct fib_result_nl { __be32 fl_addr; /* To be looked up*/ u32 fl_mark; unsigned char fl_tos; unsigned char fl_scope; unsigned char tb_id_in; unsigned char tb_id; /* Results */ unsigned char prefixlen; unsigned char nh_sel; unsigned char type; unsigned char scope; int err; }; #ifdef CONFIG_IP_MULTIPLE_TABLES #define FIB_TABLE_HASHSZ 256 #else #define FIB_TABLE_HASHSZ 2 #endif __be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc, unsigned char scope); __be32 fib_result_prefsrc(struct net *net, struct fib_result *res); #define FIB_RES_NHC(res) ((res).nhc) #define FIB_RES_DEV(res) (FIB_RES_NHC(res)->nhc_dev) #define FIB_RES_OIF(res) (FIB_RES_NHC(res)->nhc_oif) struct fib_rt_info { struct fib_info *fi; u32 tb_id; __be32 dst; int dst_len; u8 tos; u8 type; u8 offload:1, trap:1, unused:6; }; struct fib_entry_notifier_info { struct fib_notifier_info info; /* must be first */ u32 dst; int dst_len; struct fib_info *fi; u8 tos; u8 type; u32 tb_id; }; struct fib_nh_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib_nh *fib_nh; }; int call_fib4_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info); int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info); int __net_init fib4_notifier_init(struct net *net); void __net_exit fib4_notifier_exit(struct net *net); void fib_info_notify_update(struct net *net, struct nl_info *info); int fib_notify(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); struct fib_table { struct hlist_node tb_hlist; u32 tb_id; int tb_num_default; struct rcu_head rcu; unsigned long *tb_data; unsigned long __data[]; }; struct fib_dump_filter { u32 table_id; /* filter_set is an optimization that an entry is set */ bool filter_set; bool dump_routes; bool dump_exceptions; unsigned char protocol; unsigned char rt_type; unsigned int flags; struct net_device *dev; }; int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags); int fib_table_insert(struct net *, struct fib_table *, struct fib_config *, struct netlink_ext_ack *extack); int fib_table_delete(struct net *, struct fib_table *, struct fib_config *, struct netlink_ext_ack *extack); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb, struct fib_dump_filter *filter); int fib_table_flush(struct net *net, struct fib_table *table, bool flush_all); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); #ifndef CONFIG_IP_MULTIPLE_TABLES #define TABLE_LOCAL_INDEX (RT_TABLE_LOCAL & (FIB_TABLE_HASHSZ - 1)) #define TABLE_MAIN_INDEX (RT_TABLE_MAIN & (FIB_TABLE_HASHSZ - 1)) static inline struct fib_table *fib_get_table(struct net *net, u32 id) { struct hlist_node *tb_hlist; struct hlist_head *ptr; ptr = id == RT_TABLE_LOCAL ? &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX] : &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]; tb_hlist = rcu_dereference_rtnl(hlist_first_rcu(ptr)); return hlist_entry(tb_hlist, struct fib_table, tb_hlist); } static inline struct fib_table *fib_new_table(struct net *net, u32 id) { return fib_get_table(net, id); } static inline int fib_lookup(struct net *net, const struct flowi4 *flp, struct fib_result *res, unsigned int flags) { struct fib_table *tb; int err = -ENETUNREACH; rcu_read_lock(); tb = fib_get_table(net, RT_TABLE_MAIN); if (tb) err = fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF); if (err == -EAGAIN) err = -ENETUNREACH; rcu_read_unlock(); return err; } static inline bool fib4_has_custom_rules(const struct net *net) { return false; } static inline bool fib4_rule_default(const struct fib_rule *rule) { return true; } static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return 0; } static inline unsigned int fib4_rules_seq_read(struct net *net) { return 0; } static inline bool fib4_rules_early_flow_dissect(struct net *net, struct sk_buff *skb, struct flowi4 *fl4, struct flow_keys *flkeys) { return false; } #else /* CONFIG_IP_MULTIPLE_TABLES */ int __net_init fib4_rules_init(struct net *net); void __net_exit fib4_rules_exit(struct net *net); struct fib_table *fib_new_table(struct net *net, u32 id); struct fib_table *fib_get_table(struct net *net, u32 id); int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res, unsigned int flags); static inline int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res, unsigned int flags) { struct fib_table *tb; int err = -ENETUNREACH; flags |= FIB_LOOKUP_NOREF; if (net->ipv4.fib_has_custom_rules) return __fib_lookup(net, flp, res, flags); rcu_read_lock(); res->tclassid = 0; tb = rcu_dereference_rtnl(net->ipv4.fib_main); if (tb) err = fib_table_lookup(tb, flp, res, flags); if (!err) goto out; tb = rcu_dereference_rtnl(net->ipv4.fib_default); if (tb) err = fib_table_lookup(tb, flp, res, flags); out: if (err == -EAGAIN) err = -ENETUNREACH; rcu_read_unlock(); return err; } static inline bool fib4_has_custom_rules(const struct net *net) { return net->ipv4.fib_has_custom_rules; } bool fib4_rule_default(const struct fib_rule *rule); int fib4_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); unsigned int fib4_rules_seq_read(struct net *net); static inline bool fib4_rules_early_flow_dissect(struct net *net, struct sk_buff *skb, struct flowi4 *fl4, struct flow_keys *flkeys) { unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; if (!net->ipv4.fib_rules_require_fldissect) return false; skb_flow_dissect_flow_keys(skb, flkeys, flag); fl4->fl4_sport = flkeys->ports.src; fl4->fl4_dport = flkeys->ports.dst; fl4->flowi4_proto = flkeys->basic.ip_proto; return true; } #endif /* CONFIG_IP_MULTIPLE_TABLES */ /* Exported by fib_frontend.c */ extern const struct nla_policy rtm_ipv4_policy[]; void ip_fib_init(void); int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla, struct netlink_ext_ack *extack); __be32 fib_compute_spec_dst(struct sk_buff *skb); bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev); int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, struct in_device *idev, u32 *itag); #ifdef CONFIG_IP_ROUTE_CLASSID static inline int fib_num_tclassid_users(struct net *net) { return atomic_read(&net->ipv4.fib_num_tclassid_users); } #else static inline int fib_num_tclassid_users(struct net *net) { return 0; } #endif int fib_unmerge(struct net *net); static inline bool nhc_l3mdev_matches_dev(const struct fib_nh_common *nhc, const struct net_device *dev) { if (nhc->nhc_dev == dev || l3mdev_master_ifindex_rcu(nhc->nhc_dev) == dev->ifindex) return true; return false; } /* Exported by fib_semantics.c */ int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned char nh_flags); void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig); #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); #endif int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, struct netlink_ext_ack *extack); void fib_select_multipath(struct fib_result *res, int hash); void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb); int fib_nh_init(struct net *net, struct fib_nh *fib_nh, struct fib_config *cfg, int nh_weight, struct netlink_ext_ack *extack); void fib_nh_release(struct net *net, struct fib_nh *fib_nh); int fib_nh_common_init(struct net *net, struct fib_nh_common *nhc, struct nlattr *fc_encap, u16 fc_encap_type, void *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void fib_nh_common_release(struct fib_nh_common *nhc); /* Exported by fib_trie.c */ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri); void fib_trie_init(void); struct fib_table *fib_trie_table(u32 id, struct fib_table *alias); bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags, const struct flowi4 *flp); static inline void fib_combine_itag(u32 *itag, const struct fib_result *res) { #ifdef CONFIG_IP_ROUTE_CLASSID struct fib_nh_common *nhc = res->nhc; #ifdef CONFIG_IP_MULTIPLE_TABLES u32 rtag; #endif if (nhc->nhc_family == AF_INET) { struct fib_nh *nh; nh = container_of(nhc, struct fib_nh, nh_common); *itag = nh->nh_tclassid << 16; } else { *itag = 0; } #ifdef CONFIG_IP_MULTIPLE_TABLES rtag = res->tclassid; if (*itag == 0) *itag = (rtag<<16); *itag |= (rtag>>16); #endif #endif } void fib_flush(struct net *net); void free_fib_info(struct fib_info *fi); static inline void fib_info_hold(struct fib_info *fi) { refcount_inc(&fi->fib_clntref); } static inline void fib_info_put(struct fib_info *fi) { if (refcount_dec_and_test(&fi->fib_clntref)) free_fib_info(fi); } #ifdef CONFIG_PROC_FS int __net_init fib_proc_init(struct net *net); void __net_exit fib_proc_exit(struct net *net); #else static inline int fib_proc_init(struct net *net) { return 0; } static inline void fib_proc_exit(struct net *net) { } #endif u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr); int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct fib_dump_filter *filter, struct netlink_callback *cb); int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh, u8 rt_family, unsigned char *flags, bool skip_oif); int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh, int nh_weight, u8 rt_family, u32 nh_tclassid); #endif /* _NET_FIB_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 /* SPDX-License-Identifier: GPL-2.0 */ /* * bvec iterator * * Copyright (C) 2001 Ming Lei <ming.lei@canonical.com> */ #ifndef __LINUX_BVEC_ITER_H #define __LINUX_BVEC_ITER_H #include <linux/bug.h> #include <linux/errno.h> #include <linux/limits.h> #include <linux/minmax.h> #include <linux/mm.h> #include <linux/types.h> struct page; /** * struct bio_vec - a contiguous range of physical memory addresses * @bv_page: First page associated with the address range. * @bv_len: Number of bytes in the address range. * @bv_offset: Start of the address range relative to the start of @bv_page. * * The following holds for a bvec if n * PAGE_SIZE < bv_offset + bv_len: * * nth_page(@bv_page, n) == @bv_page + n * * This holds because page_is_mergeable() checks the above property. */ struct bio_vec { struct page *bv_page; unsigned int bv_len; unsigned int bv_offset; }; struct bvec_iter { sector_t bi_sector; /* device address in 512 byte sectors */ unsigned int bi_size; /* residual I/O count */ unsigned int bi_idx; /* current index into bvl_vec */ unsigned int bi_bvec_done; /* number of bytes completed in current bvec */ }; struct bvec_iter_all { struct bio_vec bv; int idx; unsigned done; }; /* * various member access, note that bio_data should of course not be used * on highmem page vectors */ #define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx]) /* multi-page (mp_bvec) helpers */ #define mp_bvec_iter_page(bvec, iter) \ (__bvec_iter_bvec((bvec), (iter))->bv_page) #define mp_bvec_iter_len(bvec, iter) \ min((iter).bi_size, \ __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done) #define mp_bvec_iter_offset(bvec, iter) \ (__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done) #define mp_bvec_iter_page_idx(bvec, iter) \ (mp_bvec_iter_offset((bvec), (iter)) / PAGE_SIZE) #define mp_bvec_iter_bvec(bvec, iter) \ ((struct bio_vec) { \ .bv_page = mp_bvec_iter_page((bvec), (iter)), \ .bv_len = mp_bvec_iter_len((bvec), (iter)), \ .bv_offset = mp_bvec_iter_offset((bvec), (iter)), \ }) /* For building single-page bvec in flight */ #define bvec_iter_offset(bvec, iter) \ (mp_bvec_iter_offset((bvec), (iter)) % PAGE_SIZE) #define bvec_iter_len(bvec, iter) \ min_t(unsigned, mp_bvec_iter_len((bvec), (iter)), \ PAGE_SIZE - bvec_iter_offset((bvec), (iter))) #define bvec_iter_page(bvec, iter) \ (mp_bvec_iter_page((bvec), (iter)) + \ mp_bvec_iter_page_idx((bvec), (iter))) #define bvec_iter_bvec(bvec, iter) \ ((struct bio_vec) { \ .bv_page = bvec_iter_page((bvec), (iter)), \ .bv_len = bvec_iter_len((bvec), (iter)), \ .bv_offset = bvec_iter_offset((bvec), (iter)), \ }) static inline bool bvec_iter_advance(const struct bio_vec *bv, struct bvec_iter *iter, unsigned bytes) { unsigned int idx = iter->bi_idx; if (WARN_ONCE(bytes > iter->bi_size, "Attempted to advance past end of bvec iter\n")) { iter->bi_size = 0; return false; } iter->bi_size -= bytes; bytes += iter->bi_bvec_done; while (bytes && bytes >= bv[idx].bv_len) { bytes -= bv[idx].bv_len; idx++; } iter->bi_idx = idx; iter->bi_bvec_done = bytes; return true; } static inline void bvec_iter_skip_zero_bvec(struct bvec_iter *iter) { iter->bi_bvec_done = 0; iter->bi_idx++; } #define for_each_bvec(bvl, bio_vec, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ (bvl).bv_len ? (void)bvec_iter_advance((bio_vec), &(iter), \ (bvl).bv_len) : bvec_iter_skip_zero_bvec(&(iter))) /* for iterating one bio from start to end */ #define BVEC_ITER_ALL_INIT (struct bvec_iter) \ { \ .bi_sector = 0, \ .bi_size = UINT_MAX, \ .bi_idx = 0, \ .bi_bvec_done = 0, \ } static inline struct bio_vec *bvec_init_iter_all(struct bvec_iter_all *iter_all) { iter_all->done = 0; iter_all->idx = 0; return &iter_all->bv; } static inline void bvec_advance(const struct bio_vec *bvec, struct bvec_iter_all *iter_all) { struct bio_vec *bv = &iter_all->bv; if (iter_all->done) { bv->bv_page++; bv->bv_offset = 0; } else { bv->bv_page = bvec->bv_page + (bvec->bv_offset >> PAGE_SHIFT); bv->bv_offset = bvec->bv_offset & ~PAGE_MASK; } bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset, bvec->bv_len - iter_all->done); iter_all->done += bv->bv_len; if (iter_all->done == bvec->bv_len) { iter_all->idx++; iter_all->done = 0; } } #endif /* __LINUX_BVEC_ITER_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_MSR_H #define _ASM_X86_MSR_H #include "msr-index.h" #ifndef __ASSEMBLY__ #include <asm/asm.h> #include <asm/errno.h> #include <asm/cpumask.h> #include <uapi/asm/msr.h> struct msr { union { struct { u32 l; u32 h; }; u64 q; }; }; struct msr_info { u32 msr_no; struct msr reg; struct msr *msrs; int err; }; struct msr_regs_info { u32 *regs; int err; }; struct saved_msr { bool valid; struct msr_info info; }; struct saved_msrs { unsigned int num; struct saved_msr *array; }; /* * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A" * constraint has different meanings. For i386, "A" means exactly * edx:eax, while for x86_64 it doesn't mean rdx:rax or edx:eax. Instead, * it means rax *or* rdx. */ #ifdef CONFIG_X86_64 /* Using 64-bit values saves one instruction clearing the high half of low */ #define DECLARE_ARGS(val, low, high) unsigned long low, high #define EAX_EDX_VAL(val, low, high) ((low) | (high) << 32) #define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high) #else #define DECLARE_ARGS(val, low, high) unsigned long long val #define EAX_EDX_VAL(val, low, high) (val) #define EAX_EDX_RET(val, low, high) "=A" (val) #endif /* * Be very careful with includes. This header is prone to include loops. */ #include <asm/atomic.h> #include <linux/tracepoint-defs.h> #ifdef CONFIG_TRACEPOINTS DECLARE_TRACEPOINT(read_msr); DECLARE_TRACEPOINT(write_msr); DECLARE_TRACEPOINT(rdpmc); extern void do_trace_write_msr(unsigned int msr, u64 val, int failed); extern void do_trace_read_msr(unsigned int msr, u64 val, int failed); extern void do_trace_rdpmc(unsigned int msr, u64 val, int failed); #else static inline void do_trace_write_msr(unsigned int msr, u64 val, int failed) {} static inline void do_trace_read_msr(unsigned int msr, u64 val, int failed) {} static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {} #endif /* * __rdmsr() and __wrmsr() are the two primitives which are the bare minimum MSR * accessors and should not have any tracing or other functionality piggybacking * on them - those are *purely* for accessing MSRs and nothing more. So don't even * think of extending them - you will be slapped with a stinking trout or a frozen * shark will reach you, wherever you are! You've been warned. */ static __always_inline unsigned long long __rdmsr(unsigned int msr) { DECLARE_ARGS(val, low, high); asm volatile("1: rdmsr\n" "2:\n" _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe) : EAX_EDX_RET(val, low, high) : "c" (msr)); return EAX_EDX_VAL(val, low, high); } static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high) { asm volatile("1: wrmsr\n" "2:\n" _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe) : : "c" (msr), "a"(low), "d" (high) : "memory"); } #define native_rdmsr(msr, val1, val2) \ do { \ u64 __val = __rdmsr((msr)); \ (void)((val1) = (u32)__val); \ (void)((val2) = (u32)(__val >> 32)); \ } while (0) #define native_wrmsr(msr, low, high) \ __wrmsr(msr, low, high) #define native_wrmsrl(msr, val) \ __wrmsr((msr), (u32)((u64)(val)), \ (u32)((u64)(val) >> 32)) static inline unsigned long long native_read_msr(unsigned int msr) { unsigned long long val; val = __rdmsr(msr); if (tracepoint_enabled(read_msr)) do_trace_read_msr(msr, val, 0); return val; } static inline unsigned long long native_read_msr_safe(unsigned int msr, int *err) { DECLARE_ARGS(val, low, high); asm volatile("2: rdmsr ; xor %[err],%[err]\n" "1:\n\t" ".section .fixup,\"ax\"\n\t" "3: mov %[fault],%[err]\n\t" "xorl %%eax, %%eax\n\t" "xorl %%edx, %%edx\n\t" "jmp 1b\n\t" ".previous\n\t" _ASM_EXTABLE(2b, 3b) : [err] "=r" (*err), EAX_EDX_RET(val, low, high) : "c" (msr), [fault] "i" (-EIO)); if (tracepoint_enabled(read_msr)) do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err); return EAX_EDX_VAL(val, low, high); } /* Can be uninlined because referenced by paravirt */ static inline void notrace native_write_msr(unsigned int msr, u32 low, u32 high) { __wrmsr(msr, low, high); if (tracepoint_enabled(write_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), 0); } /* Can be uninlined because referenced by paravirt */ static inline int notrace native_write_msr_safe(unsigned int msr, u32 low, u32 high) { int err; asm volatile("2: wrmsr ; xor %[err],%[err]\n" "1:\n\t" ".section .fixup,\"ax\"\n\t" "3: mov %[fault],%[err] ; jmp 1b\n\t" ".previous\n\t" _ASM_EXTABLE(2b, 3b) : [err] "=a" (err) : "c" (msr), "0" (low), "d" (high), [fault] "i" (-EIO) : "memory"); if (tracepoint_enabled(write_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), err); return err; } extern int rdmsr_safe_regs(u32 regs[8]); extern int wrmsr_safe_regs(u32 regs[8]); /** * rdtsc() - returns the current TSC without ordering constraints * * rdtsc() returns the result of RDTSC as a 64-bit integer. The * only ordering constraint it supplies is the ordering implied by * "asm volatile": it will put the RDTSC in the place you expect. The * CPU can and will speculatively execute that RDTSC, though, so the * results can be non-monotonic if compared on different CPUs. */ static __always_inline unsigned long long rdtsc(void) { DECLARE_ARGS(val, low, high); asm volatile("rdtsc" : EAX_EDX_RET(val, low, high)); return EAX_EDX_VAL(val, low, high); } /** * rdtsc_ordered() - read the current TSC in program order * * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer. * It is ordered like a load to a global in-memory counter. It should * be impossible to observe non-monotonic rdtsc_unordered() behavior * across multiple CPUs as long as the TSC is synced. */ static __always_inline unsigned long long rdtsc_ordered(void) { DECLARE_ARGS(val, low, high); /* * The RDTSC instruction is not ordered relative to memory * access. The Intel SDM and the AMD APM are both vague on this * point, but empirically an RDTSC instruction can be * speculatively executed before prior loads. An RDTSC * immediately after an appropriate barrier appears to be * ordered as a normal load, that is, it provides the same * ordering guarantees as reading from a global memory location * that some other imaginary CPU is updating continuously with a * time stamp. * * Thus, use the preferred barrier on the respective CPU, aiming for * RDTSCP as the default. */ asm volatile(ALTERNATIVE_2("rdtsc", "lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC, "rdtscp", X86_FEATURE_RDTSCP) : EAX_EDX_RET(val, low, high) /* RDTSCP clobbers ECX with MSR_TSC_AUX. */ :: "ecx"); return EAX_EDX_VAL(val, low, high); } static inline unsigned long long native_read_pmc(int counter) { DECLARE_ARGS(val, low, high); asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter)); if (tracepoint_enabled(rdpmc)) do_trace_rdpmc(counter, EAX_EDX_VAL(val, low, high), 0); return EAX_EDX_VAL(val, low, high); } #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #include <linux/errno.h> /* * Access to machine-specific registers (available on 586 and better only) * Note: the rd* operations modify the parameters directly (without using * pointer indirection), this allows gcc to optimize better */ #define rdmsr(msr, low, high) \ do { \ u64 __val = native_read_msr((msr)); \ (void)((low) = (u32)__val); \ (void)((high) = (u32)(__val >> 32)); \ } while (0) static inline void wrmsr(unsigned int msr, u32 low, u32 high) { native_write_msr(msr, low, high); } #define rdmsrl(msr, val) \ ((val) = native_read_msr((msr))) static inline void wrmsrl(unsigned int msr, u64 val) { native_write_msr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32)); } /* wrmsr with exception handling */ static inline int wrmsr_safe(unsigned int msr, u32 low, u32 high) { return native_write_msr_safe(msr, low, high); } /* rdmsr with exception handling */ #define rdmsr_safe(msr, low, high) \ ({ \ int __err; \ u64 __val = native_read_msr_safe((msr), &__err); \ (*low) = (u32)__val; \ (*high) = (u32)(__val >> 32); \ __err; \ }) static inline int rdmsrl_safe(unsigned int msr, unsigned long long *p) { int err; *p = native_read_msr_safe(msr, &err); return err; } #define rdpmc(counter, low, high) \ do { \ u64 _l = native_read_pmc((counter)); \ (low) = (u32)_l; \ (high) = (u32)(_l >> 32); \ } while (0) #define rdpmcl(counter, val) ((val) = native_read_pmc(counter)) #endif /* !CONFIG_PARAVIRT_XXL */ /* * 64-bit version of wrmsr_safe(): */ static inline int wrmsrl_safe(u32 msr, u64 val) { return wrmsr_safe(msr, (u32)val, (u32)(val >> 32)); } #define write_tsc(low, high) wrmsr(MSR_IA32_TSC, (low), (high)) #define write_rdtscp_aux(val) wrmsr(MSR_TSC_AUX, (val), 0) struct msr *msrs_alloc(void); void msrs_free(struct msr *msrs); int msr_set_bit(u32 msr, u8 bit); int msr_clear_bit(u32 msr, u8 bit); #ifdef CONFIG_SMP int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q); int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q); void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q); int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q); int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); #else /* CONFIG_SMP */ static inline int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) { rdmsr(msr_no, *l, *h); return 0; } static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) { wrmsr(msr_no, l, h); return 0; } static inline int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) { rdmsrl(msr_no, *q); return 0; } static inline int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q) { wrmsrl(msr_no, q); return 0; } static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no, struct msr *msrs) { rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h)); } static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no, struct msr *msrs) { wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h); } static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) { return rdmsr_safe(msr_no, l, h); } static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) { return wrmsr_safe(msr_no, l, h); } static inline int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) { return rdmsrl_safe(msr_no, q); } static inline int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q) { return wrmsrl_safe(msr_no, q); } static inline int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) { return rdmsr_safe_regs(regs); } static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) { return wrmsr_safe_regs(regs); } #endif /* CONFIG_SMP */ #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_MSR_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 1994 Linus Torvalds * * Pentium III FXSR, SSE support * General FPU state handling cleanups * Gareth Hughes <gareth@valinux.com>, May 2000 */ #include <asm/fpu/internal.h> #include <asm/fpu/regset.h> #include <asm/fpu/signal.h> #include <asm/fpu/types.h> #include <asm/traps.h> #include <asm/irq_regs.h> #include <linux/hardirq.h> #include <linux/pkeys.h> #define CREATE_TRACE_POINTS #include <asm/trace/fpu.h> /* * Represents the initial FPU state. It's mostly (but not completely) zeroes, * depending on the FPU hardware format: */ union fpregs_state init_fpstate __read_mostly; /* * Track whether the kernel is using the FPU state * currently. * * This flag is used: * * - by IRQ context code to potentially use the FPU * if it's unused. * * - to debug kernel_fpu_begin()/end() correctness */ static DEFINE_PER_CPU(bool, in_kernel_fpu); /* * Track which context is using the FPU on the CPU: */ DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); static bool kernel_fpu_disabled(void) { return this_cpu_read(in_kernel_fpu); } static bool interrupted_kernel_fpu_idle(void) { return !kernel_fpu_disabled(); } /* * Were we in user mode (or vm86 mode) when we were * interrupted? * * Doing kernel_fpu_begin/end() is ok if we are running * in an interrupt context from user mode - we'll just * save the FPU state as required. */ static bool interrupted_user_mode(void) { struct pt_regs *regs = get_irq_regs(); return regs && user_mode(regs); } /* * Can we use the FPU in kernel mode with the * whole "kernel_fpu_begin/end()" sequence? * * It's always ok in process context (ie "not interrupt") * but it is sometimes ok even from an irq. */ bool irq_fpu_usable(void) { return !in_interrupt() || interrupted_user_mode() || interrupted_kernel_fpu_idle(); } EXPORT_SYMBOL(irq_fpu_usable); /* * These must be called with preempt disabled. Returns * 'true' if the FPU state is still intact and we can * keep registers active. * * The legacy FNSAVE instruction cleared all FPU state * unconditionally, so registers are essentially destroyed. * Modern FPU state can be kept in registers, if there are * no pending FP exceptions. */ int copy_fpregs_to_fpstate(struct fpu *fpu) { if (likely(use_xsave())) { copy_xregs_to_kernel(&fpu->state.xsave); /* * AVX512 state is tracked here because its use is * known to slow the max clock speed of the core. */ if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512) fpu->avx512_timestamp = jiffies; return 1; } if (likely(use_fxsr())) { copy_fxregs_to_kernel(fpu); return 1; } /* * Legacy FPU register saving, FNSAVE always clears FPU registers, * so we have to mark them inactive: */ asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave)); return 0; } EXPORT_SYMBOL(copy_fpregs_to_fpstate); void kernel_fpu_begin_mask(unsigned int kfpu_mask) { preempt_disable(); WARN_ON_FPU(!irq_fpu_usable()); WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); this_cpu_write(in_kernel_fpu, true); if (!(current->flags & PF_KTHREAD) && !test_thread_flag(TIF_NEED_FPU_LOAD)) { set_thread_flag(TIF_NEED_FPU_LOAD); /* * Ignore return value -- we don't care if reg state * is clobbered. */ copy_fpregs_to_fpstate(&current->thread.fpu); } __cpu_invalidate_fpregs_state(); /* Put sane initial values into the control registers. */ if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM)) ldmxcsr(MXCSR_DEFAULT); if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU)) asm volatile ("fninit"); } EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask); void kernel_fpu_end(void) { WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); this_cpu_write(in_kernel_fpu, false); preempt_enable(); } EXPORT_SYMBOL_GPL(kernel_fpu_end); /* * Save the FPU state (mark it for reload if necessary): * * This only ever gets called for the current task. */ void fpu__save(struct fpu *fpu) { WARN_ON_FPU(fpu != &current->thread.fpu); fpregs_lock(); trace_x86_fpu_before_save(fpu); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { if (!copy_fpregs_to_fpstate(fpu)) { copy_kernel_to_fpregs(&fpu->state); } } trace_x86_fpu_after_save(fpu); fpregs_unlock(); } /* * Legacy x87 fpstate state init: */ static inline void fpstate_init_fstate(struct fregs_state *fp) { fp->cwd = 0xffff037fu; fp->swd = 0xffff0000u; fp->twd = 0xffffffffu; fp->fos = 0xffff0000u; } void fpstate_init(union fpregs_state *state) { if (!static_cpu_has(X86_FEATURE_FPU)) { fpstate_init_soft(&state->soft); return; } memset(state, 0, fpu_kernel_xstate_size); if (static_cpu_has(X86_FEATURE_XSAVES)) fpstate_init_xstate(&state->xsave); if (static_cpu_has(X86_FEATURE_FXSR)) fpstate_init_fxstate(&state->fxsave); else fpstate_init_fstate(&state->fsave); } EXPORT_SYMBOL_GPL(fpstate_init); int fpu__copy(struct task_struct *dst, struct task_struct *src) { struct fpu *dst_fpu = &dst->thread.fpu; struct fpu *src_fpu = &src->thread.fpu; dst_fpu->last_cpu = -1; if (!static_cpu_has(X86_FEATURE_FPU)) return 0; WARN_ON_FPU(src_fpu != &current->thread.fpu); /* * Don't let 'init optimized' areas of the XSAVE area * leak into the child task: */ memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); /* * If the FPU registers are not current just memcpy() the state. * Otherwise save current FPU registers directly into the child's FPU * context, without any memory-to-memory copying. * * ( The function 'fails' in the FNSAVE case, which destroys * register contents so we have to load them back. ) */ fpregs_lock(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size); else if (!copy_fpregs_to_fpstate(dst_fpu)) copy_kernel_to_fpregs(&dst_fpu->state); fpregs_unlock(); set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD); trace_x86_fpu_copy_src(src_fpu); trace_x86_fpu_copy_dst(dst_fpu); return 0; } /* * Activate the current task's in-memory FPU context, * if it has not been used before: */ static void fpu__initialize(struct fpu *fpu) { WARN_ON_FPU(fpu != &current->thread.fpu); set_thread_flag(TIF_NEED_FPU_LOAD); fpstate_init(&fpu->state); trace_x86_fpu_init_state(fpu); } /* * This function must be called before we read a task's fpstate. * * There's two cases where this gets called: * * - for the current task (when coredumping), in which case we have * to save the latest FPU registers into the fpstate, * * - or it's called for stopped tasks (ptrace), in which case the * registers were already saved by the context-switch code when * the task scheduled out. * * If the task has used the FPU before then save it. */ void fpu__prepare_read(struct fpu *fpu) { if (fpu == &current->thread.fpu) fpu__save(fpu); } /* * This function must be called before we write a task's fpstate. * * Invalidate any cached FPU registers. * * After this function call, after registers in the fpstate are * modified and the child task has woken up, the child task will * restore the modified FPU state from the modified context. If we * didn't clear its cached status here then the cached in-registers * state pending on its former CPU could be restored, corrupting * the modifications. */ void fpu__prepare_write(struct fpu *fpu) { /* * Only stopped child tasks can be used to modify the FPU * state in the fpstate buffer: */ WARN_ON_FPU(fpu == &current->thread.fpu); /* Invalidate any cached state: */ __fpu_invalidate_fpregs_state(fpu); } /* * Drops current FPU state: deactivates the fpregs and * the fpstate. NOTE: it still leaves previous contents * in the fpregs in the eager-FPU case. * * This function can be used in cases where we know that * a state-restore is coming: either an explicit one, * or a reschedule. */ void fpu__drop(struct fpu *fpu) { preempt_disable(); if (fpu == &current->thread.fpu) { /* Ignore delayed exceptions from user space */ asm volatile("1: fwait\n" "2:\n" _ASM_EXTABLE(1b, 2b)); fpregs_deactivate(fpu); } trace_x86_fpu_dropped(fpu); preempt_enable(); } /* * Clear FPU registers by setting them up from the init fpstate. * Caller must do fpregs_[un]lock() around it. */ static inline void copy_init_fpstate_to_fpregs(u64 features_mask) { if (use_xsave()) copy_kernel_to_xregs(&init_fpstate.xsave, features_mask); else if (static_cpu_has(X86_FEATURE_FXSR)) copy_kernel_to_fxregs(&init_fpstate.fxsave); else copy_kernel_to_fregs(&init_fpstate.fsave); if (boot_cpu_has(X86_FEATURE_OSPKE)) copy_init_pkru_to_fpregs(); } /* * Clear the FPU state back to init state. * * Called by sys_execve(), by the signal handler code and by various * error paths. */ static void fpu__clear(struct fpu *fpu, bool user_only) { WARN_ON_FPU(fpu != &current->thread.fpu); if (!static_cpu_has(X86_FEATURE_FPU)) { fpu__drop(fpu); fpu__initialize(fpu); return; } fpregs_lock(); if (user_only) { if (!fpregs_state_valid(fpu, smp_processor_id()) && xfeatures_mask_supervisor()) copy_kernel_to_xregs(&fpu->state.xsave, xfeatures_mask_supervisor()); copy_init_fpstate_to_fpregs(xfeatures_mask_user()); } else { copy_init_fpstate_to_fpregs(xfeatures_mask_all); } fpregs_mark_activate(); fpregs_unlock(); } void fpu__clear_user_states(struct fpu *fpu) { fpu__clear(fpu, true); } void fpu__clear_all(struct fpu *fpu) { fpu__clear(fpu, false); } /* * Load FPU context before returning to userspace. */ void switch_fpu_return(void) { if (!static_cpu_has(X86_FEATURE_FPU)) return; __fpregs_load_activate(); } EXPORT_SYMBOL_GPL(switch_fpu_return); #ifdef CONFIG_X86_DEBUG_FPU /* * If current FPU state according to its tracking (loaded FPU context on this * CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is * loaded on return to userland. */ void fpregs_assert_state_consistent(void) { struct fpu *fpu = &current->thread.fpu; if (test_thread_flag(TIF_NEED_FPU_LOAD)) return; WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id())); } EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent); #endif void fpregs_mark_activate(void) { struct fpu *fpu = &current->thread.fpu; fpregs_activate(fpu); fpu->last_cpu = smp_processor_id(); clear_thread_flag(TIF_NEED_FPU_LOAD); } EXPORT_SYMBOL_GPL(fpregs_mark_activate); /* * x87 math exception handling: */ int fpu__exception_code(struct fpu *fpu, int trap_nr) { int err; if (trap_nr == X86_TRAP_MF) { unsigned short cwd, swd; /* * (~cwd & swd) will mask out exceptions that are not set to unmasked * status. 0x3f is the exception bits in these regs, 0x200 is the * C1 reg you need in case of a stack fault, 0x040 is the stack * fault bit. We should only be taking one exception at a time, * so if this combination doesn't produce any single exception, * then we have a bad program that isn't synchronizing its FPU usage * and it will suffer the consequences since we won't be able to * fully reproduce the context of the exception. */ if (boot_cpu_has(X86_FEATURE_FXSR)) { cwd = fpu->state.fxsave.cwd; swd = fpu->state.fxsave.swd; } else { cwd = (unsigned short)fpu->state.fsave.cwd; swd = (unsigned short)fpu->state.fsave.swd; } err = swd & ~cwd; } else { /* * The SIMD FPU exceptions are handled a little differently, as there * is only a single status/control register. Thus, to determine which * unmasked exception was caught we must mask the exception mask bits * at 0x1f80, and then use these to mask the exception bits at 0x3f. */ unsigned short mxcsr = MXCSR_DEFAULT; if (boot_cpu_has(X86_FEATURE_XMM)) mxcsr = fpu->state.fxsave.mxcsr; err = ~(mxcsr >> 7) & mxcsr; } if (err & 0x001) { /* Invalid op */ /* * swd & 0x240 == 0x040: Stack Underflow * swd & 0x240 == 0x240: Stack Overflow * User must clear the SF bit (0x40) if set */ return FPE_FLTINV; } else if (err & 0x004) { /* Divide by Zero */ return FPE_FLTDIV; } else if (err & 0x008) { /* Overflow */ return FPE_FLTOVF; } else if (err & 0x012) { /* Denormal, Underflow */ return FPE_FLTUND; } else if (err & 0x020) { /* Precision */ return FPE_FLTRES; } /* * If we're using IRQ 13, or supposedly even some trap * X86_TRAP_MF implementations, it's possible * we get a spurious trap, which is not an error. */ return 0; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM signal #if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SIGNAL_H #include <linux/signal.h> #include <linux/sched.h> #include <linux/tracepoint.h> #define TP_STORE_SIGINFO(__entry, info) \ do { \ if (info == SEND_SIG_NOINFO) { \ __entry->errno = 0; \ __entry->code = SI_USER; \ } else if (info == SEND_SIG_PRIV) { \ __entry->errno = 0; \ __entry->code = SI_KERNEL; \ } else { \ __entry->errno = info->si_errno; \ __entry->code = info->si_code; \ } \ } while (0) #ifndef TRACE_HEADER_MULTI_READ enum { TRACE_SIGNAL_DELIVERED, TRACE_SIGNAL_IGNORED, TRACE_SIGNAL_ALREADY_PENDING, TRACE_SIGNAL_OVERFLOW_FAIL, TRACE_SIGNAL_LOSE_INFO, }; #endif /** * signal_generate - called when a signal is generated * @sig: signal number * @info: pointer to struct siginfo * @task: pointer to struct task_struct * @group: shared or private * @result: TRACE_SIGNAL_* * * Current process sends a 'sig' signal to 'task' process with * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, * 'info' is not a pointer and you can't access its field. Instead, * SEND_SIG_NOINFO means that si_code is SI_USER, and SEND_SIG_PRIV * means that si_code is SI_KERNEL. */ TRACE_EVENT(signal_generate, TP_PROTO(int sig, struct kernel_siginfo *info, struct task_struct *task, int group, int result), TP_ARGS(sig, info, task, group, result), TP_STRUCT__entry( __field( int, sig ) __field( int, errno ) __field( int, code ) __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, group ) __field( int, result ) ), TP_fast_assign( __entry->sig = sig; TP_STORE_SIGINFO(__entry, info); memcpy(__entry->comm, task->comm, TASK_COMM_LEN); __entry->pid = task->pid; __entry->group = group; __entry->result = result; ), TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d grp=%d res=%d", __entry->sig, __entry->errno, __entry->code, __entry->comm, __entry->pid, __entry->group, __entry->result) ); /** * signal_deliver - called when a signal is delivered * @sig: signal number * @info: pointer to struct siginfo * @ka: pointer to struct k_sigaction * * A 'sig' signal is delivered to current process with 'info' siginfo, * and it will be handled by 'ka'. ka->sa.sa_handler can be SIG_IGN or * SIG_DFL. * Note that some signals reported by signal_generate tracepoint can be * lost, ignored or modified (by debugger) before hitting this tracepoint. * This means, this can show which signals are actually delivered, but * matching generated signals and delivered signals may not be correct. */ TRACE_EVENT(signal_deliver, TP_PROTO(int sig, struct kernel_siginfo *info, struct k_sigaction *ka), TP_ARGS(sig, info, ka), TP_STRUCT__entry( __field( int, sig ) __field( int, errno ) __field( int, code ) __field( unsigned long, sa_handler ) __field( unsigned long, sa_flags ) ), TP_fast_assign( __entry->sig = sig; TP_STORE_SIGINFO(__entry, info); __entry->sa_handler = (unsigned long)ka->sa.sa_handler; __entry->sa_flags = ka->sa.sa_flags; ), TP_printk("sig=%d errno=%d code=%d sa_handler=%lx sa_flags=%lx", __entry->sig, __entry->errno, __entry->code, __entry->sa_handler, __entry->sa_flags) ); #endif /* _TRACE_SIGNAL_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM rpm #if !defined(_TRACE_RUNTIME_POWER_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_RUNTIME_POWER_H #include <linux/ktime.h> #include <linux/tracepoint.h> struct device; /* * The rpm_internal events are used for tracing some important * runtime pm internal functions. */ DECLARE_EVENT_CLASS(rpm_internal, TP_PROTO(struct device *dev, int flags), TP_ARGS(dev, flags), TP_STRUCT__entry( __string( name, dev_name(dev) ) __field( int, flags ) __field( int , usage_count ) __field( int , disable_depth ) __field( int , runtime_auto ) __field( int , request_pending ) __field( int , irq_safe ) __field( int , child_count ) ), TP_fast_assign( __assign_str(name, dev_name(dev)); __entry->flags = flags; __entry->usage_count = atomic_read( &dev->power.usage_count); __entry->disable_depth = dev->power.disable_depth; __entry->runtime_auto = dev->power.runtime_auto; __entry->request_pending = dev->power.request_pending; __entry->irq_safe = dev->power.irq_safe; __entry->child_count = atomic_read( &dev->power.child_count); ), TP_printk("%s flags-%x cnt-%-2d dep-%-2d auto-%-1d p-%-1d" " irq-%-1d child-%d", __get_str(name), __entry->flags, __entry->usage_count, __entry->disable_depth, __entry->runtime_auto, __entry->request_pending, __entry->irq_safe, __entry->child_count ) ); DEFINE_EVENT(rpm_internal, rpm_suspend, TP_PROTO(struct device *dev, int flags), TP_ARGS(dev, flags) ); DEFINE_EVENT(rpm_internal, rpm_resume, TP_PROTO(struct device *dev, int flags), TP_ARGS(dev, flags) ); DEFINE_EVENT(rpm_internal, rpm_idle, TP_PROTO(struct device *dev, int flags), TP_ARGS(dev, flags) ); DEFINE_EVENT(rpm_internal, rpm_usage, TP_PROTO(struct device *dev, int flags), TP_ARGS(dev, flags) ); TRACE_EVENT(rpm_return_int, TP_PROTO(struct device *dev, unsigned long ip, int ret), TP_ARGS(dev, ip, ret), TP_STRUCT__entry( __string( name, dev_name(dev)) __field( unsigned long, ip ) __field( int, ret ) ), TP_fast_assign( __assign_str(name, dev_name(dev)); __entry->ip = ip; __entry->ret = ret; ), TP_printk("%pS:%s ret=%d", (void *)__entry->ip, __get_str(name), __entry->ret) ); #endif /* _TRACE_RUNTIME_POWER_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Fast and scalable bitmaps. * * Copyright (C) 2016 Facebook * Copyright (C) 2013-2014 Jens Axboe */ #ifndef __LINUX_SCALE_BITMAP_H #define __LINUX_SCALE_BITMAP_H #include <linux/kernel.h> #include <linux/slab.h> struct seq_file; /** * struct sbitmap_word - Word in a &struct sbitmap. */ struct sbitmap_word { /** * @depth: Number of bits being used in @word/@cleared */ unsigned long depth; /** * @word: word holding free bits */ unsigned long word ____cacheline_aligned_in_smp; /** * @cleared: word holding cleared bits */ unsigned long cleared ____cacheline_aligned_in_smp; /** * @swap_lock: Held while swapping word <-> cleared */ spinlock_t swap_lock; } ____cacheline_aligned_in_smp; /** * struct sbitmap - Scalable bitmap. * * A &struct sbitmap is spread over multiple cachelines to avoid ping-pong. This * trades off higher memory usage for better scalability. */ struct sbitmap { /** * @depth: Number of bits used in the whole bitmap. */ unsigned int depth; /** * @shift: log2(number of bits used per word) */ unsigned int shift; /** * @map_nr: Number of words (cachelines) being used for the bitmap. */ unsigned int map_nr; /** * @map: Allocated bitmap. */ struct sbitmap_word *map; }; #define SBQ_WAIT_QUEUES 8 #define SBQ_WAKE_BATCH 8 /** * struct sbq_wait_state - Wait queue in a &struct sbitmap_queue. */ struct sbq_wait_state { /** * @wait_cnt: Number of frees remaining before we wake up. */ atomic_t wait_cnt; /** * @wait: Wait queue. */ wait_queue_head_t wait; } ____cacheline_aligned_in_smp; /** * struct sbitmap_queue - Scalable bitmap with the added ability to wait on free * bits. * * A &struct sbitmap_queue uses multiple wait queues and rolling wakeups to * avoid contention on the wait queue spinlock. This ensures that we don't hit a * scalability wall when we run out of free bits and have to start putting tasks * to sleep. */ struct sbitmap_queue { /** * @sb: Scalable bitmap. */ struct sbitmap sb; /* * @alloc_hint: Cache of last successfully allocated or freed bit. * * This is per-cpu, which allows multiple users to stick to different * cachelines until the map is exhausted. */ unsigned int __percpu *alloc_hint; /** * @wake_batch: Number of bits which must be freed before we wake up any * waiters. */ unsigned int wake_batch; /** * @wake_index: Next wait queue in @ws to wake up. */ atomic_t wake_index; /** * @ws: Wait queues. */ struct sbq_wait_state *ws; /* * @ws_active: count of currently active ws waitqueues */ atomic_t ws_active; /** * @round_robin: Allocate bits in strict round-robin order. */ bool round_robin; /** * @min_shallow_depth: The minimum shallow depth which may be passed to * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow(). */ unsigned int min_shallow_depth; }; /** * sbitmap_init_node() - Initialize a &struct sbitmap on a specific memory node. * @sb: Bitmap to initialize. * @depth: Number of bits to allocate. * @shift: Use 2^@shift bits per word in the bitmap; if a negative number if * given, a good default is chosen. * @flags: Allocation flags. * @node: Memory node to allocate on. * * Return: Zero on success or negative errno on failure. */ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, gfp_t flags, int node); /** * sbitmap_free() - Free memory used by a &struct sbitmap. * @sb: Bitmap to free. */ static inline void sbitmap_free(struct sbitmap *sb) { kfree(sb->map); sb->map = NULL; } /** * sbitmap_resize() - Resize a &struct sbitmap. * @sb: Bitmap to resize. * @depth: New number of bits to resize to. * * Doesn't reallocate anything. It's up to the caller to ensure that the new * depth doesn't exceed the depth that the sb was initialized with. */ void sbitmap_resize(struct sbitmap *sb, unsigned int depth); /** * sbitmap_get() - Try to allocate a free bit from a &struct sbitmap. * @sb: Bitmap to allocate from. * @alloc_hint: Hint for where to start searching for a free bit. * @round_robin: If true, be stricter about allocation order; always allocate * starting from the last allocated bit. This is less efficient * than the default behavior (false). * * This operation provides acquire barrier semantics if it succeeds. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin); /** * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, * limiting the depth used from each word. * @sb: Bitmap to allocate from. * @alloc_hint: Hint for where to start searching for a free bit. * @shallow_depth: The maximum number of bits to allocate from a single word. * * This rather specific operation allows for having multiple users with * different allocation limits. E.g., there can be a high-priority class that * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow() * with a @shallow_depth of (1 << (@sb->shift - 1)). Then, the low-priority * class can only allocate half of the total bits in the bitmap, preventing it * from starving out the high-priority class. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, unsigned long shallow_depth); /** * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap. * @sb: Bitmap to check. * * Return: true if any bit in the bitmap is set, false otherwise. */ bool sbitmap_any_bit_set(const struct sbitmap *sb); #define SB_NR_TO_INDEX(sb, bitnr) ((bitnr) >> (sb)->shift) #define SB_NR_TO_BIT(sb, bitnr) ((bitnr) & ((1U << (sb)->shift) - 1U)) typedef bool (*sb_for_each_fn)(struct sbitmap *, unsigned int, void *); /** * __sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap. * @start: Where to start the iteration. * @sb: Bitmap to iterate over. * @fn: Callback. Should return true to continue or false to break early. * @data: Pointer to pass to callback. * * This is inline even though it's non-trivial so that the function calls to the * callback will hopefully get optimized away. */ static inline void __sbitmap_for_each_set(struct sbitmap *sb, unsigned int start, sb_for_each_fn fn, void *data) { unsigned int index; unsigned int nr; unsigned int scanned = 0; if (start >= sb->depth) start = 0; index = SB_NR_TO_INDEX(sb, start); nr = SB_NR_TO_BIT(sb, start); while (scanned < sb->depth) { unsigned long word; unsigned int depth = min_t(unsigned int, sb->map[index].depth - nr, sb->depth - scanned); scanned += depth; word = sb->map[index].word & ~sb->map[index].cleared; if (!word) goto next; /* * On the first iteration of the outer loop, we need to add the * bit offset back to the size of the word for find_next_bit(). * On all other iterations, nr is zero, so this is a noop. */ depth += nr; while (1) { nr = find_next_bit(&word, depth, nr); if (nr >= depth) break; if (!fn(sb, (index << sb->shift) + nr, data)) return; nr++; } next: nr = 0; if (++index >= sb->map_nr) index = 0; } } /** * sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap. * @sb: Bitmap to iterate over. * @fn: Callback. Should return true to continue or false to break early. * @data: Pointer to pass to callback. */ static inline void sbitmap_for_each_set(struct sbitmap *sb, sb_for_each_fn fn, void *data) { __sbitmap_for_each_set(sb, 0, fn, data); } static inline unsigned long *__sbitmap_word(struct sbitmap *sb, unsigned int bitnr) { return &sb->map[SB_NR_TO_INDEX(sb, bitnr)].word; } /* Helpers equivalent to the operations in asm/bitops.h and linux/bitmap.h */ static inline void sbitmap_set_bit(struct sbitmap *sb, unsigned int bitnr) { set_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr) { clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } /* * This one is special, since it doesn't actually clear the bit, rather it * sets the corresponding bit in the ->cleared mask instead. Paired with * the caller doing sbitmap_deferred_clear() if a given index is full, which * will clear the previously freed entries in the corresponding ->word. */ static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr) { unsigned long *addr = &sb->map[SB_NR_TO_INDEX(sb, bitnr)].cleared; set_bit(SB_NR_TO_BIT(sb, bitnr), addr); } static inline void sbitmap_clear_bit_unlock(struct sbitmap *sb, unsigned int bitnr) { clear_bit_unlock(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) { return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } /** * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file. * @sb: Bitmap to show. * @m: struct seq_file to write to. * * This is intended for debugging. The format may change at any time. */ void sbitmap_show(struct sbitmap *sb, struct seq_file *m); /** * sbitmap_bitmap_show() - Write a hex dump of a &struct sbitmap to a &struct * seq_file. * @sb: Bitmap to show. * @m: struct seq_file to write to. * * This is intended for debugging. The output isn't guaranteed to be internally * consistent. */ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m); /** * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific * memory node. * @sbq: Bitmap queue to initialize. * @depth: See sbitmap_init_node(). * @shift: See sbitmap_init_node(). * @round_robin: See sbitmap_get(). * @flags: Allocation flags. * @node: Memory node to allocate on. * * Return: Zero on success or negative errno on failure. */ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, int shift, bool round_robin, gfp_t flags, int node); /** * sbitmap_queue_free() - Free memory used by a &struct sbitmap_queue. * * @sbq: Bitmap queue to free. */ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq) { kfree(sbq->ws); free_percpu(sbq->alloc_hint); sbitmap_free(&sbq->sb); } /** * sbitmap_queue_resize() - Resize a &struct sbitmap_queue. * @sbq: Bitmap queue to resize. * @depth: New number of bits to resize to. * * Like sbitmap_resize(), this doesn't reallocate anything. It has to do * some extra work on the &struct sbitmap_queue, so it's not safe to just * resize the underlying &struct sbitmap. */ void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth); /** * __sbitmap_queue_get() - Try to allocate a free bit from a &struct * sbitmap_queue with preemption already disabled. * @sbq: Bitmap queue to allocate from. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ int __sbitmap_queue_get(struct sbitmap_queue *sbq); /** * __sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct * sbitmap_queue, limiting the depth used from each word, with preemption * already disabled. * @sbq: Bitmap queue to allocate from. * @shallow_depth: The maximum number of bits to allocate from a single word. * See sbitmap_get_shallow(). * * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after * initializing @sbq. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, unsigned int shallow_depth); /** * sbitmap_queue_get() - Try to allocate a free bit from a &struct * sbitmap_queue. * @sbq: Bitmap queue to allocate from. * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to * sbitmap_queue_clear()). * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, unsigned int *cpu) { int nr; *cpu = get_cpu(); nr = __sbitmap_queue_get(sbq); put_cpu(); return nr; } /** * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct * sbitmap_queue, limiting the depth used from each word. * @sbq: Bitmap queue to allocate from. * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to * sbitmap_queue_clear()). * @shallow_depth: The maximum number of bits to allocate from a single word. * See sbitmap_get_shallow(). * * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after * initializing @sbq. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ static inline int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, unsigned int *cpu, unsigned int shallow_depth) { int nr; *cpu = get_cpu(); nr = __sbitmap_queue_get_shallow(sbq, shallow_depth); put_cpu(); return nr; } /** * sbitmap_queue_min_shallow_depth() - Inform a &struct sbitmap_queue of the * minimum shallow depth that will be used. * @sbq: Bitmap queue in question. * @min_shallow_depth: The minimum shallow depth that will be passed to * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow(). * * sbitmap_queue_clear() batches wakeups as an optimization. The batch size * depends on the depth of the bitmap. Since the shallow allocation functions * effectively operate with a different depth, the shallow depth must be taken * into account when calculating the batch size. This function must be called * with the minimum shallow depth that will be used. Failure to do so can result * in missed wakeups. */ void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, unsigned int min_shallow_depth); /** * sbitmap_queue_clear() - Free an allocated bit and wake up waiters on a * &struct sbitmap_queue. * @sbq: Bitmap to free from. * @nr: Bit number to free. * @cpu: CPU the bit was allocated on. */ void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu); static inline int sbq_index_inc(int index) { return (index + 1) & (SBQ_WAIT_QUEUES - 1); } static inline void sbq_index_atomic_inc(atomic_t *index) { int old = atomic_read(index); int new = sbq_index_inc(old); atomic_cmpxchg(index, old, new); } /** * sbq_wait_ptr() - Get the next wait queue to use for a &struct * sbitmap_queue. * @sbq: Bitmap queue to wait on. * @wait_index: A counter per "user" of @sbq. */ static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq, atomic_t *wait_index) { struct sbq_wait_state *ws; ws = &sbq->ws[atomic_read(wait_index)]; sbq_index_atomic_inc(wait_index); return ws; } /** * sbitmap_queue_wake_all() - Wake up everything waiting on a &struct * sbitmap_queue. * @sbq: Bitmap queue to wake up. */ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); /** * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue * on a &struct sbitmap_queue. * @sbq: Bitmap queue to wake up. */ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq); /** * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct * seq_file. * @sbq: Bitmap queue to show. * @m: struct seq_file to write to. * * This is intended for debugging. The format may change at any time. */ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m); struct sbq_wait { struct sbitmap_queue *sbq; /* if set, sbq_wait is accounted */ struct wait_queue_entry wait; }; #define DEFINE_SBQ_WAIT(name) \ struct sbq_wait name = { \ .sbq = NULL, \ .wait = { \ .private = current, \ .func = autoremove_wake_function, \ .entry = LIST_HEAD_INIT((name).wait.entry), \ } \ } /* * Wrapper around prepare_to_wait_exclusive(), which maintains some extra * internal state. */ void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait, int state); /* * Must be paired with sbitmap_prepare_to_wait(). */ void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait); /* * Wrapper around add_wait_queue(), which maintains some extra internal state */ void sbitmap_add_wait_queue(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait); /* * Must be paired with sbitmap_add_wait_queue() */ void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait); #endif /* __LINUX_SCALE_BITMAP_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_UNWIND_H #define _ASM_X86_UNWIND_H #include <linux/sched.h> #include <linux/ftrace.h> #include <asm/ptrace.h> #include <asm/stacktrace.h> #define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip)) #define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET) struct unwind_state { struct stack_info stack_info; unsigned long stack_mask; struct task_struct *task; int graph_idx; bool error; #if defined(CONFIG_UNWINDER_ORC) bool signal, full_regs; unsigned long sp, bp, ip; struct pt_regs *regs, *prev_regs; #elif defined(CONFIG_UNWINDER_FRAME_POINTER) bool got_irq; unsigned long *bp, *orig_sp, ip; /* * If non-NULL: The current frame is incomplete and doesn't contain a * valid BP. When looking for the next frame, use this instead of the * non-existent saved BP. */ unsigned long *next_bp; struct pt_regs *regs; #else unsigned long *sp; #endif }; void __unwind_start(struct unwind_state *state, struct task_struct *task, struct pt_regs *regs, unsigned long *first_frame); bool unwind_next_frame(struct unwind_state *state); unsigned long unwind_get_return_address(struct unwind_state *state); unsigned long *unwind_get_return_address_ptr(struct unwind_state *state); static inline bool unwind_done(struct unwind_state *state) { return state->stack_info.type == STACK_TYPE_UNKNOWN; } static inline bool unwind_error(struct unwind_state *state) { return state->error; } static inline void unwind_start(struct unwind_state *state, struct task_struct *task, struct pt_regs *regs, unsigned long *first_frame) { first_frame = first_frame ? : get_stack_pointer(task, regs); __unwind_start(state, task, regs, first_frame); } #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) /* * If 'partial' returns true, only the iret frame registers are valid. */ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, bool *partial) { if (unwind_done(state)) return NULL; if (partial) { #ifdef CONFIG_UNWINDER_ORC *partial = !state->full_regs; #else *partial = false; #endif } return state->regs; } #else static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, bool *partial) { return NULL; } #endif #ifdef CONFIG_UNWINDER_ORC void unwind_init(void); void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size); #else static inline void unwind_init(void) {} static inline void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {} #endif /* * This disables KASAN checking when reading a value from another task's stack, * since the other task could be running on another CPU and could have poisoned * the stack in the meantime. */ #define READ_ONCE_TASK_STACK(task, x) \ ({ \ unsigned long val; \ if (task == current) \ val = READ_ONCE(x); \ else \ val = READ_ONCE_NOCHECK(x); \ val; \ }) static inline bool task_on_another_cpu(struct task_struct *task) { #ifdef CONFIG_SMP return task != current && task->on_cpu; #else return false; #endif } #endif /* _ASM_X86_UNWIND_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2014 Felix Fietkau <nbd@nbd.name> * Copyright (C) 2004 - 2009 Ivo van Doorn <IvDoorn@gmail.com> */ #ifndef _LINUX_BITFIELD_H #define _LINUX_BITFIELD_H #include <linux/build_bug.h> #include <asm/byteorder.h> /* * Bitfield access macros * * FIELD_{GET,PREP} macros take as first parameter shifted mask * from which they extract the base mask and shift amount. * Mask must be a compilation time constant. * * Example: * * #define REG_FIELD_A GENMASK(6, 0) * #define REG_FIELD_B BIT(7) * #define REG_FIELD_C GENMASK(15, 8) * #define REG_FIELD_D GENMASK(31, 16) * * Get: * a = FIELD_GET(REG_FIELD_A, reg); * b = FIELD_GET(REG_FIELD_B, reg); * * Set: * reg = FIELD_PREP(REG_FIELD_A, 1) | * FIELD_PREP(REG_FIELD_B, 0) | * FIELD_PREP(REG_FIELD_C, c) | * FIELD_PREP(REG_FIELD_D, 0x40); * * Modify: * reg &= ~REG_FIELD_C; * reg |= FIELD_PREP(REG_FIELD_C, c); */ #define __bf_shf(x) (__builtin_ffsll(x) - 1) #define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \ ({ \ BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ _pfx "mask is not constant"); \ BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero"); \ BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ _pfx "value too large for the field"); \ BUILD_BUG_ON_MSG((_mask) > (typeof(_reg))~0ull, \ _pfx "type of reg too small for mask"); \ __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \ (1ULL << __bf_shf(_mask))); \ }) /** * FIELD_MAX() - produce the maximum value representable by a field * @_mask: shifted mask defining the field's length and position * * FIELD_MAX() returns the maximum value that can be held in the field * specified by @_mask. */ #define FIELD_MAX(_mask) \ ({ \ __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_MAX: "); \ (typeof(_mask))((_mask) >> __bf_shf(_mask)); \ }) /** * FIELD_FIT() - check if value fits in the field * @_mask: shifted mask defining the field's length and position * @_val: value to test against the field * * Return: true if @_val can fit inside @_mask, false if @_val is too big. */ #define FIELD_FIT(_mask, _val) \ ({ \ __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_FIT: "); \ !((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \ }) /** * FIELD_PREP() - prepare a bitfield element * @_mask: shifted mask defining the field's length and position * @_val: value to put in the field * * FIELD_PREP() masks and shifts up the value. The result should * be combined with other fields of the bitfield using logical OR. */ #define FIELD_PREP(_mask, _val) \ ({ \ __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: "); \ ((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \ }) /** * FIELD_GET() - extract a bitfield element * @_mask: shifted mask defining the field's length and position * @_reg: value of entire bitfield * * FIELD_GET() extracts the field specified by @_mask from the * bitfield passed in as @_reg by masking and shifting it down. */ #define FIELD_GET(_mask, _reg) \ ({ \ __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \ (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \ }) extern void __compiletime_error("value doesn't fit into mask") __field_overflow(void); extern void __compiletime_error("bad bitfield mask") __bad_mask(void); static __always_inline u64 field_multiplier(u64 field) { if ((field | (field - 1)) & ((field | (field - 1)) + 1)) __bad_mask(); return field & -field; } static __always_inline u64 field_mask(u64 field) { return field / field_multiplier(field); } #define field_max(field) ((typeof(field))field_mask(field)) #define ____MAKE_OP(type,base,to,from) \ static __always_inline __##type type##_encode_bits(base v, base field) \ { \ if (__builtin_constant_p(v) && (v & ~field_mask(field))) \ __field_overflow(); \ return to((v & field_mask(field)) * field_multiplier(field)); \ } \ static __always_inline __##type type##_replace_bits(__##type old, \ base val, base field) \ { \ return (old & ~to(field)) | type##_encode_bits(val, field); \ } \ static __always_inline void type##p_replace_bits(__##type *p, \ base val, base field) \ { \ *p = (*p & ~to(field)) | type##_encode_bits(val, field); \ } \ static __always_inline base type##_get_bits(__##type v, base field) \ { \ return (from(v) & field)/field_multiplier(field); \ } #define __MAKE_OP(size) \ ____MAKE_OP(le##size,u##size,cpu_to_le##size,le##size##_to_cpu) \ ____MAKE_OP(be##size,u##size,cpu_to_be##size,be##size##_to_cpu) \ ____MAKE_OP(u##size,u##size,,) ____MAKE_OP(u8,u8,,) __MAKE_OP(16) __MAKE_OP(32) __MAKE_OP(64) #undef __MAKE_OP #undef ____MAKE_OP #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_RT_H #define _LINUX_SCHED_RT_H #include <linux/sched.h> struct task_struct; static inline int rt_prio(int prio) { if (unlikely(prio < MAX_RT_PRIO)) return 1; return 0; } static inline int rt_task(struct task_struct *p) { return rt_prio(p->prio); } static inline bool task_is_realtime(struct task_struct *tsk) { int policy = tsk->policy; if (policy == SCHED_FIFO || policy == SCHED_RR) return true; if (policy == SCHED_DEADLINE) return true; return false; } #ifdef CONFIG_RT_MUTEXES /* * Must hold either p->pi_lock or task_rq(p)->lock. */ static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p) { return p->pi_top_task; } extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); extern void rt_mutex_adjust_pi(struct task_struct *p); static inline bool tsk_is_pi_blocked(struct task_struct *tsk) { return tsk->pi_blocked_on != NULL; } #else static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) { return NULL; } # define rt_mutex_adjust_pi(p) do { } while (0) static inline bool tsk_is_pi_blocked(struct task_struct *tsk) { return false; } #endif extern void normalize_rt_tasks(void); /* * default timeslice is 100 msecs (used only for SCHED_RR tasks). * Timeslices get refilled after they expire. */ #define RR_TIMESLICE (100 * HZ / 1000) #endif /* _LINUX_SCHED_RT_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_DCACHE_H #define __LINUX_DCACHE_H #include <linux/atomic.h> #include <linux/list.h> #include <linux/rculist.h> #include <linux/rculist_bl.h> #include <linux/spinlock.h> #include <linux/seqlock.h> #include <linux/cache.h> #include <linux/rcupdate.h> #include <linux/lockref.h> #include <linux/stringhash.h> #include <linux/wait.h> struct path; struct vfsmount; /* * linux/include/linux/dcache.h * * Dirent cache data structures * * (C) Copyright 1997 Thomas Schoebel-Theuer, * with heavy changes by Linus Torvalds */ #define IS_ROOT(x) ((x) == (x)->d_parent) /* The hash is always the low bits of hash_len */ #ifdef __LITTLE_ENDIAN #define HASH_LEN_DECLARE u32 hash; u32 len #define bytemask_from_count(cnt) (~(~0ul << (cnt)*8)) #else #define HASH_LEN_DECLARE u32 len; u32 hash #define bytemask_from_count(cnt) (~(~0ul >> (cnt)*8)) #endif /* * "quick string" -- eases parameter passing, but more importantly * saves "metadata" about the string (ie length and the hash). * * hash comes first so it snuggles against d_parent in the * dentry. */ struct qstr { union { struct { HASH_LEN_DECLARE; }; u64 hash_len; }; const unsigned char *name; }; #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } extern const struct qstr empty_name; extern const struct qstr slash_name; struct dentry_stat_t { long nr_dentry; long nr_unused; long age_limit; /* age in seconds */ long want_pages; /* pages requested by system */ long nr_negative; /* # of unused negative dentries */ long dummy; /* Reserved for future use */ }; extern struct dentry_stat_t dentry_stat; /* * Try to keep struct dentry aligned on 64 byte cachelines (this will * give reasonable cacheline footprint with larger lines without the * large memory footprint increase). */ #ifdef CONFIG_64BIT # define DNAME_INLINE_LEN 32 /* 192 bytes */ #else # ifdef CONFIG_SMP # define DNAME_INLINE_LEN 36 /* 128 bytes */ # else # define DNAME_INLINE_LEN 40 /* 128 bytes */ # endif #endif #define d_lock d_lockref.lock struct dentry { /* RCU lookup touched fields */ unsigned int d_flags; /* protected by d_lock */ seqcount_spinlock_t d_seq; /* per dentry seqlock */ struct hlist_bl_node d_hash; /* lookup hash list */ struct dentry *d_parent; /* parent directory */ struct qstr d_name; struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ /* Ref lookup also touches following */ struct lockref d_lockref; /* per-dentry lock and refcount */ const struct dentry_operations *d_op; struct super_block *d_sb; /* The root of the dentry tree */ unsigned long d_time; /* used by d_revalidate */ void *d_fsdata; /* fs-specific data */ union { struct list_head d_lru; /* LRU list */ wait_queue_head_t *d_wait; /* in-lookup ones only */ }; struct list_head d_child; /* child of parent list */ struct list_head d_subdirs; /* our children */ /* * d_alias and d_rcu can share memory */ union { struct hlist_node d_alias; /* inode alias list */ struct hlist_bl_node d_in_lookup_hash; /* only for in-lookup ones */ struct rcu_head d_rcu; } d_u; } __randomize_layout; /* * dentry->d_lock spinlock nesting subclasses: * * 0: normal * 1: nested */ enum dentry_d_lock_class { DENTRY_D_LOCK_NORMAL, /* implicitly used by plain spin_lock() APIs. */ DENTRY_D_LOCK_NESTED }; struct dentry_operations { int (*d_revalidate)(struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, struct qstr *); int (*d_compare)(const struct dentry *, unsigned int, const char *, const struct qstr *); int (*d_delete)(const struct dentry *); int (*d_init)(struct dentry *); void (*d_release)(struct dentry *); void (*d_prune)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, const struct inode *); } ____cacheline_aligned; /* * Locking rules for dentry_operations callbacks are to be found in * Documentation/filesystems/locking.rst. Keep it updated! * * FUrther descriptions are found in Documentation/filesystems/vfs.rst. * Keep it updated too! */ /* d_flags entries */ #define DCACHE_OP_HASH 0x00000001 #define DCACHE_OP_COMPARE 0x00000002 #define DCACHE_OP_REVALIDATE 0x00000004 #define DCACHE_OP_DELETE 0x00000008 #define DCACHE_OP_PRUNE 0x00000010 #define DCACHE_DISCONNECTED 0x00000020 /* This dentry is possibly not currently connected to the dcache tree, in * which case its parent will either be itself, or will have this flag as * well. nfsd will not use a dentry with this bit set, but will first * endeavour to clear the bit either by discovering that it is connected, * or by performing lookup operations. Any filesystem which supports * nfsd_operations MUST have a lookup function which, if it finds a * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that * dentry into place and return that dentry rather than the passed one, * typically using d_splice_alias. */ #define DCACHE_REFERENCED 0x00000040 /* Recently used, don't discard. */ #define DCACHE_DONTCACHE 0x00000080 /* Purge from memory on final dput() */ #define DCACHE_CANT_MOUNT 0x00000100 #define DCACHE_GENOCIDE 0x00000200 #define DCACHE_SHRINK_LIST 0x00000400 #define DCACHE_OP_WEAK_REVALIDATE 0x00000800 #define DCACHE_NFSFS_RENAMED 0x00001000 /* this dentry has been "silly renamed" and has to be deleted on the last * dput() */ #define DCACHE_COOKIE 0x00002000 /* For use by dcookie subsystem */ #define DCACHE_FSNOTIFY_PARENT_WATCHED 0x00004000 /* Parent inode is watched by some fsnotify listener */ #define DCACHE_DENTRY_KILLED 0x00008000 #define DCACHE_MOUNTED 0x00010000 /* is a mountpoint */ #define DCACHE_NEED_AUTOMOUNT 0x00020000 /* handle automount on this dir */ #define DCACHE_MANAGE_TRANSIT 0x00040000 /* manage transit from this dirent */ #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) #define DCACHE_LRU_LIST 0x00080000 #define DCACHE_ENTRY_TYPE 0x00700000 #define DCACHE_MISS_TYPE 0x00000000 /* Negative dentry (maybe fallthru to nowhere) */ #define DCACHE_WHITEOUT_TYPE 0x00100000 /* Whiteout dentry (stop pathwalk) */ #define DCACHE_DIRECTORY_TYPE 0x00200000 /* Normal directory */ #define DCACHE_AUTODIR_TYPE 0x00300000 /* Lookupless directory (presumed automount) */ #define DCACHE_REGULAR_TYPE 0x00400000 /* Regular file type (or fallthru to such) */ #define DCACHE_SPECIAL_TYPE 0x00500000 /* Other file type (or fallthru to such) */ #define DCACHE_SYMLINK_TYPE 0x00600000 /* Symlink (or fallthru to such) */ #define DCACHE_MAY_FREE 0x00800000 #define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */ #define DCACHE_NOKEY_NAME 0x02000000 /* Encrypted name encoded without key */ #define DCACHE_OP_REAL 0x04000000 #define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ #define DCACHE_DENTRY_CURSOR 0x20000000 #define DCACHE_NORCU 0x40000000 /* No RCU delay for freeing */ extern seqlock_t rename_lock; /* * These are the low-level FS interfaces to the dcache.. */ extern void d_instantiate(struct dentry *, struct inode *); extern void d_instantiate_new(struct dentry *, struct inode *); extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *); extern void __d_drop(struct dentry *dentry); extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op); /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_anon(struct super_block *); extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, wait_queue_head_t *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern struct dentry * d_exact_alias(struct dentry *, struct inode *); extern struct dentry *d_find_any_alias(struct inode *inode); extern struct dentry * d_obtain_alias(struct inode *); extern struct dentry * d_obtain_root(struct inode *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); extern void shrink_dcache_for_umount(struct super_block *); extern void d_invalidate(struct dentry *); /* only used at mount-time */ extern struct dentry * d_make_root(struct inode *); /* <clickety>-<click> the ramfs-type tree */ extern void d_genocide(struct dentry *); extern void d_tmpfile(struct dentry *, struct inode *); extern struct dentry *d_find_alias(struct inode *); extern void d_prune_aliases(struct inode *); /* test whether we have any submounts in a subdir tree */ extern int path_has_submounts(const struct path *); /* * This adds the entry to the hash queues. */ extern void d_rehash(struct dentry *); extern void d_add(struct dentry *, struct inode *); /* used for rename() and baskets */ extern void d_move(struct dentry *, struct dentry *); extern void d_exchange(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); /* appendix may either be NULL or be used for transname suffixes */ extern struct dentry *d_lookup(const struct dentry *, const struct qstr *); extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *); extern struct dentry *__d_lookup_rcu(const struct dentry *parent, const struct qstr *name, unsigned *seq); static inline unsigned d_count(const struct dentry *dentry) { return dentry->d_lockref.count; } /* * helper function for dentry_operations.d_dname() members */ extern __printf(4, 5) char *dynamic_dname(struct dentry *, char *, int, const char *, ...); extern char *__d_path(const struct path *, const struct path *, char *, int); extern char *d_absolute_path(const struct path *, char *, int); extern char *d_path(const struct path *, char *, int); extern char *dentry_path_raw(struct dentry *, char *, int); extern char *dentry_path(struct dentry *, char *, int); /* Allocation counts.. */ /** * dget, dget_dlock - get a reference to a dentry * @dentry: dentry to get a reference to * * Given a dentry or %NULL pointer increment the reference count * if appropriate and return the dentry. A dentry will not be * destroyed when it has references. */ static inline struct dentry *dget_dlock(struct dentry *dentry) { if (dentry) dentry->d_lockref.count++; return dentry; } static inline struct dentry *dget(struct dentry *dentry) { if (dentry) lockref_get(&dentry->d_lockref); return dentry; } extern struct dentry *dget_parent(struct dentry *dentry); /** * d_unhashed - is dentry hashed * @dentry: entry to check * * Returns true if the dentry passed is not currently hashed. */ static inline int d_unhashed(const struct dentry *dentry) { return hlist_bl_unhashed(&dentry->d_hash); } static inline int d_unlinked(const struct dentry *dentry) { return d_unhashed(dentry) && !IS_ROOT(dentry); } static inline int cant_mount(const struct dentry *dentry) { return (dentry->d_flags & DCACHE_CANT_MOUNT); } static inline void dont_mount(struct dentry *dentry) { spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_CANT_MOUNT; spin_unlock(&dentry->d_lock); } extern void __d_lookup_done(struct dentry *); static inline int d_in_lookup(const struct dentry *dentry) { return dentry->d_flags & DCACHE_PAR_LOOKUP; } static inline void d_lookup_done(struct dentry *dentry) { if (unlikely(d_in_lookup(dentry))) { spin_lock(&dentry->d_lock); __d_lookup_done(dentry); spin_unlock(&dentry->d_lock); } } extern void dput(struct dentry *); static inline bool d_managed(const struct dentry *dentry) { return dentry->d_flags & DCACHE_MANAGED_DENTRY; } static inline bool d_mountpoint(const struct dentry *dentry) { return dentry->d_flags & DCACHE_MOUNTED; } /* * Directory cache entry type accessor functions. */ static inline unsigned __d_entry_type(const struct dentry *dentry) { return dentry->d_flags & DCACHE_ENTRY_TYPE; } static inline bool d_is_miss(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_MISS_TYPE; } static inline bool d_is_whiteout(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_WHITEOUT_TYPE; } static inline bool d_can_lookup(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_DIRECTORY_TYPE; } static inline bool d_is_autodir(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_AUTODIR_TYPE; } static inline bool d_is_dir(const struct dentry *dentry) { return d_can_lookup(dentry) || d_is_autodir(dentry); } static inline bool d_is_symlink(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_SYMLINK_TYPE; } static inline bool d_is_reg(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_REGULAR_TYPE; } static inline bool d_is_special(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_SPECIAL_TYPE; } static inline bool d_is_file(const struct dentry *dentry) { return d_is_reg(dentry) || d_is_special(dentry); } static inline bool d_is_negative(const struct dentry *dentry) { // TODO: check d_is_whiteout(dentry) also. return d_is_miss(dentry); } static inline bool d_flags_negative(unsigned flags) { return (flags & DCACHE_ENTRY_TYPE) == DCACHE_MISS_TYPE; } static inline bool d_is_positive(const struct dentry *dentry) { return !d_is_negative(dentry); } /** * d_really_is_negative - Determine if a dentry is really negative (ignoring fallthroughs) * @dentry: The dentry in question * * Returns true if the dentry represents either an absent name or a name that * doesn't map to an inode (ie. ->d_inode is NULL). The dentry could represent * a true miss, a whiteout that isn't represented by a 0,0 chardev or a * fallthrough marker in an opaque directory. * * Note! (1) This should be used *only* by a filesystem to examine its own * dentries. It should not be used to look at some other filesystem's * dentries. (2) It should also be used in combination with d_inode() to get * the inode. (3) The dentry may have something attached to ->d_lower and the * type field of the flags may be set to something other than miss or whiteout. */ static inline bool d_really_is_negative(const struct dentry *dentry) { return dentry->d_inode == NULL; } /** * d_really_is_positive - Determine if a dentry is really positive (ignoring fallthroughs) * @dentry: The dentry in question * * Returns true if the dentry represents a name that maps to an inode * (ie. ->d_inode is not NULL). The dentry might still represent a whiteout if * that is represented on medium as a 0,0 chardev. * * Note! (1) This should be used *only* by a filesystem to examine its own * dentries. It should not be used to look at some other filesystem's * dentries. (2) It should also be used in combination with d_inode() to get * the inode. */ static inline bool d_really_is_positive(const struct dentry *dentry) { return dentry->d_inode != NULL; } static inline int simple_positive(const struct dentry *dentry) { return d_really_is_positive(dentry) && !d_unhashed(dentry); } extern void d_set_fallthru(struct dentry *dentry); static inline bool d_is_fallthru(const struct dentry *dentry) { return dentry->d_flags & DCACHE_FALLTHRU; } extern int sysctl_vfs_cache_pressure; static inline unsigned long vfs_pressure_ratio(unsigned long val) { return mult_frac(val, sysctl_vfs_cache_pressure, 100); } /** * d_inode - Get the actual inode of this dentry * @dentry: The dentry to query * * This is the helper normal filesystems should use to get at their own inodes * in their own dentries and ignore the layering superimposed upon them. */ static inline struct inode *d_inode(const struct dentry *dentry) { return dentry->d_inode; } /** * d_inode_rcu - Get the actual inode of this dentry with READ_ONCE() * @dentry: The dentry to query * * This is the helper normal filesystems should use to get at their own inodes * in their own dentries and ignore the layering superimposed upon them. */ static inline struct inode *d_inode_rcu(const struct dentry *dentry) { return READ_ONCE(dentry->d_inode); } /** * d_backing_inode - Get upper or lower inode we should be using * @upper: The upper layer * * This is the helper that should be used to get at the inode that will be used * if this dentry were to be opened as a file. The inode may be on the upper * dentry or it may be on a lower dentry pinned by the upper. * * Normal filesystems should not use this to access their own inodes. */ static inline struct inode *d_backing_inode(const struct dentry *upper) { struct inode *inode = upper->d_inode; return inode; } /** * d_backing_dentry - Get upper or lower dentry we should be using * @upper: The upper layer * * This is the helper that should be used to get the dentry of the inode that * will be used if this dentry were opened as a file. It may be the upper * dentry or it may be a lower dentry pinned by the upper. * * Normal filesystems should not use this to access their own dentries. */ static inline struct dentry *d_backing_dentry(struct dentry *upper) { return upper; } /** * d_real - Return the real dentry * @dentry: the dentry to query * @inode: inode to select the dentry from multiple layers (can be NULL) * * If dentry is on a union/overlay, then return the underlying, real dentry. * Otherwise return the dentry itself. * * See also: Documentation/filesystems/vfs.rst */ static inline struct dentry *d_real(struct dentry *dentry, const struct inode *inode) { if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) return dentry->d_op->d_real(dentry, inode); else return dentry; } /** * d_real_inode - Return the real inode * @dentry: The dentry to query * * If dentry is on a union/overlay, then return the underlying, real inode. * Otherwise return d_inode(). */ static inline struct inode *d_real_inode(const struct dentry *dentry) { /* This usage of d_real() results in const dentry */ return d_backing_inode(d_real((struct dentry *) dentry, NULL)); } struct name_snapshot { struct qstr name; unsigned char inline_name[DNAME_INLINE_LEN]; }; void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *); void release_dentry_name_snapshot(struct name_snapshot *); #endif /* __LINUX_DCACHE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NSPROXY_H #define _LINUX_NSPROXY_H #include <linux/spinlock.h> #include <linux/sched.h> struct mnt_namespace; struct uts_namespace; struct ipc_namespace; struct pid_namespace; struct cgroup_namespace; struct fs_struct; /* * A structure to contain pointers to all per-process * namespaces - fs (mount), uts, network, sysvipc, etc. * * The pid namespace is an exception -- it's accessed using * task_active_pid_ns. The pid namespace here is the * namespace that children will use. * * 'count' is the number of tasks holding a reference. * The count for each namespace, then, will be the number * of nsproxies pointing to it, not the number of tasks. * * The nsproxy is shared by tasks which share all namespaces. * As soon as a single namespace is cloned or unshared, the * nsproxy is copied. */ struct nsproxy { atomic_t count; struct uts_namespace *uts_ns; struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; struct pid_namespace *pid_ns_for_children; struct net *net_ns; struct time_namespace *time_ns; struct time_namespace *time_ns_for_children; struct cgroup_namespace *cgroup_ns; }; extern struct nsproxy init_nsproxy; /* * A structure to encompass all bits needed to install * a partial or complete new set of namespaces. * * If a new user namespace is requested cred will * point to a modifiable set of credentials. If a pointer * to a modifiable set is needed nsset_cred() must be * used and tested. */ struct nsset { unsigned flags; struct nsproxy *nsproxy; struct fs_struct *fs; const struct cred *cred; }; static inline struct cred *nsset_cred(struct nsset *set) { if (set->flags & CLONE_NEWUSER) return (struct cred *)set->cred; return NULL; } /* * the namespaces access rules are: * * 1. only current task is allowed to change tsk->nsproxy pointer or * any pointer on the nsproxy itself. Current must hold the task_lock * when changing tsk->nsproxy. * * 2. when accessing (i.e. reading) current task's namespaces - no * precautions should be taken - just dereference the pointers * * 3. the access to other task namespaces is performed like this * task_lock(task); * nsproxy = task->nsproxy; * if (nsproxy != NULL) { * / * * * work with the namespaces here * * e.g. get the reference on one of them * * / * } / * * * NULL task->nsproxy means that this task is * * almost dead (zombie) * * / * task_unlock(task); * */ int copy_namespaces(unsigned long flags, struct task_struct *tsk); void exit_task_namespaces(struct task_struct *tsk); void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); void free_nsproxy(struct nsproxy *ns); int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, struct cred *, struct fs_struct *); int __init nsproxy_cache_init(void); static inline void put_nsproxy(struct nsproxy *ns) { if (atomic_dec_and_test(&ns->count)) { free_nsproxy(ns); } } static inline void get_nsproxy(struct nsproxy *ns) { atomic_inc(&ns->count); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 /* SPDX-License-Identifier: GPL-2.0 */ /* File: linux/posix_acl.h (C) 2002 Andreas Gruenbacher, <a.gruenbacher@computer.org> */ #ifndef __LINUX_POSIX_ACL_H #define __LINUX_POSIX_ACL_H #include <linux/bug.h> #include <linux/slab.h> #include <linux/rcupdate.h> #include <linux/refcount.h> #include <uapi/linux/posix_acl.h> struct posix_acl_entry { short e_tag; unsigned short e_perm; union { kuid_t e_uid; kgid_t e_gid; }; }; struct posix_acl { refcount_t a_refcount; struct rcu_head a_rcu; unsigned int a_count; struct posix_acl_entry a_entries[]; }; #define FOREACH_ACL_ENTRY(pa, acl, pe) \ for(pa=(acl)->a_entries, pe=pa+(acl)->a_count; pa<pe; pa++) /* * Duplicate an ACL handle. */ static inline struct posix_acl * posix_acl_dup(struct posix_acl *acl) { if (acl) refcount_inc(&acl->a_refcount); return acl; } /* * Free an ACL handle. */ static inline void posix_acl_release(struct posix_acl *acl) { if (acl && refcount_dec_and_test(&acl->a_refcount)) kfree_rcu(acl, a_rcu); } /* posix_acl.c */ extern void posix_acl_init(struct posix_acl *, int); extern struct posix_acl *posix_acl_alloc(int, gfp_t); extern int posix_acl_valid(struct user_namespace *, const struct posix_acl *); extern int posix_acl_permission(struct inode *, const struct posix_acl *, int); extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t); extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *); extern int __posix_acl_create(struct posix_acl **, gfp_t, umode_t *); extern int __posix_acl_chmod(struct posix_acl **, gfp_t, umode_t); extern struct posix_acl *get_posix_acl(struct inode *, int); extern int set_posix_acl(struct inode *, int, struct posix_acl *); #ifdef CONFIG_FS_POSIX_ACL extern int posix_acl_chmod(struct inode *, umode_t); extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **, struct posix_acl **); extern int posix_acl_update_mode(struct inode *, umode_t *, struct posix_acl **); extern int simple_set_acl(struct inode *, struct posix_acl *, int); extern int simple_acl_create(struct inode *, struct inode *); struct posix_acl *get_cached_acl(struct inode *inode, int type); struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type); void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl); void forget_cached_acl(struct inode *inode, int type); void forget_all_cached_acls(struct inode *inode); static inline void cache_no_acl(struct inode *inode) { inode->i_acl = NULL; inode->i_default_acl = NULL; } #else static inline int posix_acl_chmod(struct inode *inode, umode_t mode) { return 0; } #define simple_set_acl NULL static inline int simple_acl_create(struct inode *dir, struct inode *inode) { return 0; } static inline void cache_no_acl(struct inode *inode) { } static inline int posix_acl_create(struct inode *inode, umode_t *mode, struct posix_acl **default_acl, struct posix_acl **acl) { *default_acl = *acl = NULL; return 0; } static inline void forget_all_cached_acls(struct inode *inode) { } #endif /* CONFIG_FS_POSIX_ACL */ struct posix_acl *get_acl(struct inode *inode, int type); #endif /* __LINUX_POSIX_ACL_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 /* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/relay.h * * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp * Copyright (C) 1999, 2000, 2001, 2002 - Karim Yaghmour (karim@opersys.com) * * CONFIG_RELAY definitions and declarations */ #ifndef _LINUX_RELAY_H #define _LINUX_RELAY_H #include <linux/types.h> #include <linux/sched.h> #include <linux/timer.h> #include <linux/wait.h> #include <linux/list.h> #include <linux/irq_work.h> #include <linux/bug.h> #include <linux/fs.h> #include <linux/poll.h> #include <linux/kref.h> #include <linux/percpu.h> /* * Tracks changes to rchan/rchan_buf structs */ #define RELAYFS_CHANNEL_VERSION 7 /* * Per-cpu relay channel buffer */ struct rchan_buf { void *start; /* start of channel buffer */ void *data; /* start of current sub-buffer */ size_t offset; /* current offset into sub-buffer */ size_t subbufs_produced; /* count of sub-buffers produced */ size_t subbufs_consumed; /* count of sub-buffers consumed */ struct rchan *chan; /* associated channel */ wait_queue_head_t read_wait; /* reader wait queue */ struct irq_work wakeup_work; /* reader wakeup */ struct dentry *dentry; /* channel file dentry */ struct kref kref; /* channel buffer refcount */ struct page **page_array; /* array of current buffer pages */ unsigned int page_count; /* number of current buffer pages */ unsigned int finalized; /* buffer has been finalized */ size_t *padding; /* padding counts per sub-buffer */ size_t prev_padding; /* temporary variable */ size_t bytes_consumed; /* bytes consumed in cur read subbuf */ size_t early_bytes; /* bytes consumed before VFS inited */ unsigned int cpu; /* this buf's cpu */ } ____cacheline_aligned; /* * Relay channel data structure */ struct rchan { u32 version; /* the version of this struct */ size_t subbuf_size; /* sub-buffer size */ size_t n_subbufs; /* number of sub-buffers per buffer */ size_t alloc_size; /* total buffer size allocated */ struct rchan_callbacks *cb; /* client callbacks */ struct kref kref; /* channel refcount */ void *private_data; /* for user-defined data */ size_t last_toobig; /* tried to log event > subbuf size */ struct rchan_buf * __percpu *buf; /* per-cpu channel buffers */ int is_global; /* One global buffer ? */ struct list_head list; /* for channel list */ struct dentry *parent; /* parent dentry passed to open */ int has_base_filename; /* has a filename associated? */ char base_filename[NAME_MAX]; /* saved base filename */ }; /* * Relay channel client callbacks */ struct rchan_callbacks { /* * subbuf_start - called on buffer-switch to a new sub-buffer * @buf: the channel buffer containing the new sub-buffer * @subbuf: the start of the new sub-buffer * @prev_subbuf: the start of the previous sub-buffer * @prev_padding: unused space at the end of previous sub-buffer * * The client should return 1 to continue logging, 0 to stop * logging. * * NOTE: subbuf_start will also be invoked when the buffer is * created, so that the first sub-buffer can be initialized * if necessary. In this case, prev_subbuf will be NULL. * * NOTE: the client can reserve bytes at the beginning of the new * sub-buffer by calling subbuf_start_reserve() in this callback. */ int (*subbuf_start) (struct rchan_buf *buf, void *subbuf, void *prev_subbuf, size_t prev_padding); /* * buf_mapped - relay buffer mmap notification * @buf: the channel buffer * @filp: relay file pointer * * Called when a relay file is successfully mmapped */ void (*buf_mapped)(struct rchan_buf *buf, struct file *filp); /* * buf_unmapped - relay buffer unmap notification * @buf: the channel buffer * @filp: relay file pointer * * Called when a relay file is successfully unmapped */ void (*buf_unmapped)(struct rchan_buf *buf, struct file *filp); /* * create_buf_file - create file to represent a relay channel buffer * @filename: the name of the file to create * @parent: the parent of the file to create * @mode: the mode of the file to create * @buf: the channel buffer * @is_global: outparam - set non-zero if the buffer should be global * * Called during relay_open(), once for each per-cpu buffer, * to allow the client to create a file to be used to * represent the corresponding channel buffer. If the file is * created outside of relay, the parent must also exist in * that filesystem. * * The callback should return the dentry of the file created * to represent the relay buffer. * * Setting the is_global outparam to a non-zero value will * cause relay_open() to create a single global buffer rather * than the default set of per-cpu buffers. * * See Documentation/filesystems/relay.rst for more info. */ struct dentry *(*create_buf_file)(const char *filename, struct dentry *parent, umode_t mode, struct rchan_buf *buf, int *is_global); /* * remove_buf_file - remove file representing a relay channel buffer * @dentry: the dentry of the file to remove * * Called during relay_close(), once for each per-cpu buffer, * to allow the client to remove a file used to represent a * channel buffer. * * The callback should return 0 if successful, negative if not. */ int (*remove_buf_file)(struct dentry *dentry); }; /* * CONFIG_RELAY kernel API, kernel/relay.c */ struct rchan *relay_open(const char *base_filename, struct dentry *parent, size_t subbuf_size, size_t n_subbufs, struct rchan_callbacks *cb, void *private_data); extern int relay_late_setup_files(struct rchan *chan, const char *base_filename, struct dentry *parent); extern void relay_close(struct rchan *chan); extern void relay_flush(struct rchan *chan); extern void relay_subbufs_consumed(struct rchan *chan, unsigned int cpu, size_t consumed); extern void relay_reset(struct rchan *chan); extern int relay_buf_full(struct rchan_buf *buf); extern size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length); /** * relay_write - write data into the channel * @chan: relay channel * @data: data to be written * @length: number of bytes to write * * Writes data into the current cpu's channel buffer. * * Protects the buffer by disabling interrupts. Use this * if you might be logging from interrupt context. Try * __relay_write() if you know you won't be logging from * interrupt context. */ static inline void relay_write(struct rchan *chan, const void *data, size_t length) { unsigned long flags; struct rchan_buf *buf; local_irq_save(flags); buf = *this_cpu_ptr(chan->buf); if (unlikely(buf->offset + length > chan->subbuf_size)) length = relay_switch_subbuf(buf, length); memcpy(buf->data + buf->offset, data, length); buf->offset += length; local_irq_restore(flags); } /** * __relay_write - write data into the channel * @chan: relay channel * @data: data to be written * @length: number of bytes to write * * Writes data into the current cpu's channel buffer. * * Protects the buffer by disabling preemption. Use * relay_write() if you might be logging from interrupt * context. */ static inline void __relay_write(struct rchan *chan, const void *data, size_t length) { struct rchan_buf *buf; buf = *get_cpu_ptr(chan->buf); if (unlikely(buf->offset + length > buf->chan->subbuf_size)) length = relay_switch_subbuf(buf, length); memcpy(buf->data + buf->offset, data, length); buf->offset += length; put_cpu_ptr(chan->buf); } /** * relay_reserve - reserve slot in channel buffer * @chan: relay channel * @length: number of bytes to reserve * * Returns pointer to reserved slot, NULL if full. * * Reserves a slot in the current cpu's channel buffer. * Does not protect the buffer at all - caller must provide * appropriate synchronization. */ static inline void *relay_reserve(struct rchan *chan, size_t length) { void *reserved = NULL; struct rchan_buf *buf = *get_cpu_ptr(chan->buf); if (unlikely(buf->offset + length > buf->chan->subbuf_size)) { length = relay_switch_subbuf(buf, length); if (!length) goto end; } reserved = buf->data + buf->offset; buf->offset += length; end: put_cpu_ptr(chan->buf); return reserved; } /** * subbuf_start_reserve - reserve bytes at the start of a sub-buffer * @buf: relay channel buffer * @length: number of bytes to reserve * * Helper function used to reserve bytes at the beginning of * a sub-buffer in the subbuf_start() callback. */ static inline void subbuf_start_reserve(struct rchan_buf *buf, size_t length) { BUG_ON(length >= buf->chan->subbuf_size - 1); buf->offset = length; } /* * exported relay file operations, kernel/relay.c */ extern const struct file_operations relay_file_operations; #ifdef CONFIG_RELAY int relay_prepare_cpu(unsigned int cpu); #else #define relay_prepare_cpu NULL #endif #endif /* _LINUX_RELAY_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _ASM_X86_INSN_H #define _ASM_X86_INSN_H /* * x86 instruction analysis * * Copyright (C) IBM Corporation, 2009 */ /* insn_attr_t is defined in inat.h */ #include <asm/inat.h> struct insn_field { union { insn_value_t value; insn_byte_t bytes[4]; }; /* !0 if we've run insn_get_xxx() for this field */ unsigned char got; unsigned char nbytes; }; struct insn { struct insn_field prefixes; /* * Prefixes * prefixes.bytes[3]: last prefix */ struct insn_field rex_prefix; /* REX prefix */ struct insn_field vex_prefix; /* VEX prefix */ struct insn_field opcode; /* * opcode.bytes[0]: opcode1 * opcode.bytes[1]: opcode2 * opcode.bytes[2]: opcode3 */ struct insn_field modrm; struct insn_field sib; struct insn_field displacement; union { struct insn_field immediate; struct insn_field moffset1; /* for 64bit MOV */ struct insn_field immediate1; /* for 64bit imm or off16/32 */ }; union { struct insn_field moffset2; /* for 64bit MOV */ struct insn_field immediate2; /* for 64bit imm or seg16 */ }; int emulate_prefix_size; insn_attr_t attr; unsigned char opnd_bytes; unsigned char addr_bytes; unsigned char length; unsigned char x86_64; const insn_byte_t *kaddr; /* kernel address of insn to analyze */ const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */ const insn_byte_t *next_byte; }; #define MAX_INSN_SIZE 15 #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) #define X86_MODRM_RM(modrm) ((modrm) & 0x07) #define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) #define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) #define X86_SIB_BASE(sib) ((sib) & 0x07) #define X86_REX_W(rex) ((rex) & 8) #define X86_REX_R(rex) ((rex) & 4) #define X86_REX_X(rex) ((rex) & 2) #define X86_REX_B(rex) ((rex) & 1) /* VEX bit flags */ #define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ #define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ #define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ /* VEX bit fields */ #define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */ #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ #define X86_VEX2_M 1 /* VEX2.M always 1 */ #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); extern void insn_get_prefixes(struct insn *insn); extern void insn_get_opcode(struct insn *insn); extern void insn_get_modrm(struct insn *insn); extern void insn_get_sib(struct insn *insn); extern void insn_get_displacement(struct insn *insn); extern void insn_get_immediate(struct insn *insn); extern void insn_get_length(struct insn *insn); /* Attribute will be determined after getting ModRM (for opcode groups) */ static inline void insn_get_attribute(struct insn *insn) { insn_get_modrm(insn); } /* Instruction uses RIP-relative addressing */ extern int insn_rip_relative(struct insn *insn); /* Init insn for kernel text */ static inline void kernel_insn_init(struct insn *insn, const void *kaddr, int buf_len) { #ifdef CONFIG_X86_64 insn_init(insn, kaddr, buf_len, 1); #else /* CONFIG_X86_32 */ insn_init(insn, kaddr, buf_len, 0); #endif } static inline int insn_is_avx(struct insn *insn) { if (!insn->prefixes.got) insn_get_prefixes(insn); return (insn->vex_prefix.value != 0); } static inline int insn_is_evex(struct insn *insn) { if (!insn->prefixes.got) insn_get_prefixes(insn); return (insn->vex_prefix.nbytes == 4); } static inline int insn_has_emulate_prefix(struct insn *insn) { return !!insn->emulate_prefix_size; } /* Ensure this instruction is decoded completely */ static inline int insn_complete(struct insn *insn) { return insn->opcode.got && insn->modrm.got && insn->sib.got && insn->displacement.got && insn->immediate.got; } static inline insn_byte_t insn_vex_m_bits(struct insn *insn) { if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ return X86_VEX2_M; else if (insn->vex_prefix.nbytes == 3) /* 3 bytes VEX */ return X86_VEX3_M(insn->vex_prefix.bytes[1]); else /* EVEX */ return X86_EVEX_M(insn->vex_prefix.bytes[1]); } static inline insn_byte_t insn_vex_p_bits(struct insn *insn) { if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ return X86_VEX_P(insn->vex_prefix.bytes[1]); else return X86_VEX_P(insn->vex_prefix.bytes[2]); } /* Get the last prefix id from last prefix or VEX prefix */ static inline int insn_last_prefix_id(struct insn *insn) { if (insn_is_avx(insn)) return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ if (insn->prefixes.bytes[3]) return inat_get_last_prefix_id(insn->prefixes.bytes[3]); return 0; } /* Offset of each field from kaddr */ static inline int insn_offset_rex_prefix(struct insn *insn) { return insn->prefixes.nbytes; } static inline int insn_offset_vex_prefix(struct insn *insn) { return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; } static inline int insn_offset_opcode(struct insn *insn) { return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; } static inline int insn_offset_modrm(struct insn *insn) { return insn_offset_opcode(insn) + insn->opcode.nbytes; } static inline int insn_offset_sib(struct insn *insn) { return insn_offset_modrm(insn) + insn->modrm.nbytes; } static inline int insn_offset_displacement(struct insn *insn) { return insn_offset_sib(insn) + insn->sib.nbytes; } static inline int insn_offset_immediate(struct insn *insn) { return insn_offset_displacement(insn) + insn->displacement.nbytes; } /** * for_each_insn_prefix() -- Iterate prefixes in the instruction * @insn: Pointer to struct insn. * @idx: Index storage. * @prefix: Prefix byte. * * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix * and the index is stored in @idx (note that this @idx is just for a cursor, * do not change it.) * Since prefixes.nbytes can be bigger than 4 if some prefixes * are repeated, it cannot be used for looping over the prefixes. */ #define for_each_insn_prefix(insn, idx, prefix) \ for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++) #define POP_SS_OPCODE 0x1f #define MOV_SREG_OPCODE 0x8e /* * Intel SDM Vol.3A 6.8.3 states; * "Any single-step trap that would be delivered following the MOV to SS * instruction or POP to SS instruction (because EFLAGS.TF is 1) is * suppressed." * This function returns true if @insn is MOV SS or POP SS. On these * instructions, single stepping is suppressed. */ static inline int insn_masking_exception(struct insn *insn) { return insn->opcode.bytes[0] == POP_SS_OPCODE || (insn->opcode.bytes[0] == MOV_SREG_OPCODE && X86_MODRM_REG(insn->modrm.bytes[0]) == 2); } #endif /* _ASM_X86_INSN_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 /* SPDX-License-Identifier: GPL-2.0 */ /* * Resizable, Scalable, Concurrent Hash Table * * Copyright (c) 2015-2016 Herbert Xu <herbert@gondor.apana.org.au> * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> * * Code partially derived from nft_hash * Rewritten with rehash code from br_multicast plus single list * pointer as suggested by Josh Triplett * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #ifndef _LINUX_RHASHTABLE_H #define _LINUX_RHASHTABLE_H #include <linux/err.h> #include <linux/errno.h> #include <linux/jhash.h> #include <linux/list_nulls.h> #include <linux/workqueue.h> #include <linux/rculist.h> #include <linux/bit_spinlock.h> #include <linux/rhashtable-types.h> /* * Objects in an rhashtable have an embedded struct rhash_head * which is linked into as hash chain from the hash table - or one * of two or more hash tables when the rhashtable is being resized. * The end of the chain is marked with a special nulls marks which has * the least significant bit set but otherwise stores the address of * the hash bucket. This allows us to be sure we've found the end * of the right list. * The value stored in the hash bucket has BIT(0) used as a lock bit. * This bit must be atomically set before any changes are made to * the chain. To avoid dereferencing this pointer without clearing * the bit first, we use an opaque 'struct rhash_lock_head *' for the * pointer stored in the bucket. This struct needs to be defined so * that rcu_dereference() works on it, but it has no content so a * cast is needed for it to be useful. This ensures it isn't * used by mistake with clearing the lock bit first. */ struct rhash_lock_head {}; /* Maximum chain length before rehash * * The maximum (not average) chain length grows with the size of the hash * table, at a rate of (log N)/(log log N). * * The value of 16 is selected so that even if the hash table grew to * 2^32 you would not expect the maximum chain length to exceed it * unless we are under attack (or extremely unlucky). * * As this limit is only to detect attacks, we don't need to set it to a * lower value as you'd need the chain length to vastly exceed 16 to have * any real effect on the system. */ #define RHT_ELASTICITY 16u /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets * @nest: Number of bits of first-level nested table. * @rehash: Current bucket being rehashed * @hash_rnd: Random seed to fold into hash * @walkers: List of active walkers * @rcu: RCU structure for freeing the table * @future_tbl: Table under construction during rehashing * @ntbl: Nested table used when out of memory. * @buckets: size * hash buckets */ struct bucket_table { unsigned int size; unsigned int nest; u32 hash_rnd; struct list_head walkers; struct rcu_head rcu; struct bucket_table __rcu *future_tbl; struct lockdep_map dep_map; struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; /* * NULLS_MARKER() expects a hash value with the low * bits mostly likely to be significant, and it discards * the msb. * We give it an address, in which the bottom bit is * always 0, and the msb might be significant. * So we shift the address down one bit to align with * expectations and avoid losing a significant bit. * * We never store the NULLS_MARKER in the hash table * itself as we need the lsb for locking. * Instead we store a NULL */ #define RHT_NULLS_MARKER(ptr) \ ((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1)) #define INIT_RHT_NULLS_HEAD(ptr) \ ((ptr) = NULL) static inline bool rht_is_a_nulls(const struct rhash_head *ptr) { return ((unsigned long) ptr & 1); } static inline void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) { return (char *)he - ht->p.head_offset; } static inline unsigned int rht_bucket_index(const struct bucket_table *tbl, unsigned int hash) { return hash & (tbl->size - 1); } static inline unsigned int rht_key_get_hash(struct rhashtable *ht, const void *key, const struct rhashtable_params params, unsigned int hash_rnd) { unsigned int hash; /* params must be equal to ht->p if it isn't constant. */ if (!__builtin_constant_p(params.key_len)) hash = ht->p.hashfn(key, ht->key_len, hash_rnd); else if (params.key_len) { unsigned int key_len = params.key_len; if (params.hashfn) hash = params.hashfn(key, key_len, hash_rnd); else if (key_len & (sizeof(u32) - 1)) hash = jhash(key, key_len, hash_rnd); else hash = jhash2(key, key_len / sizeof(u32), hash_rnd); } else { unsigned int key_len = ht->p.key_len; if (params.hashfn) hash = params.hashfn(key, key_len, hash_rnd); else hash = jhash(key, key_len, hash_rnd); } return hash; } static inline unsigned int rht_key_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const void *key, const struct rhashtable_params params) { unsigned int hash = rht_key_get_hash(ht, key, params, tbl->hash_rnd); return rht_bucket_index(tbl, hash); } static inline unsigned int rht_head_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const struct rhash_head *he, const struct rhashtable_params params) { const char *ptr = rht_obj(ht, he); return likely(params.obj_hashfn) ? rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?: ht->p.key_len, tbl->hash_rnd)) : rht_key_hashfn(ht, tbl, ptr + params.key_offset, params); } /** * rht_grow_above_75 - returns true if nelems > 0.75 * table-size * @ht: hash table * @tbl: current table */ static inline bool rht_grow_above_75(const struct rhashtable *ht, const struct bucket_table *tbl) { /* Expand table when exceeding 75% load */ return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) && (!ht->p.max_size || tbl->size < ht->p.max_size); } /** * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size * @ht: hash table * @tbl: current table */ static inline bool rht_shrink_below_30(const struct rhashtable *ht, const struct bucket_table *tbl) { /* Shrink table beneath 30% load */ return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) && tbl->size > ht->p.min_size; } /** * rht_grow_above_100 - returns true if nelems > table-size * @ht: hash table * @tbl: current table */ static inline bool rht_grow_above_100(const struct rhashtable *ht, const struct bucket_table *tbl) { return atomic_read(&ht->nelems) > tbl->size && (!ht->p.max_size || tbl->size < ht->p.max_size); } /** * rht_grow_above_max - returns true if table is above maximum * @ht: hash table * @tbl: current table */ static inline bool rht_grow_above_max(const struct rhashtable *ht, const struct bucket_table *tbl) { return atomic_read(&ht->nelems) >= ht->max_elems; } #ifdef CONFIG_PROVE_LOCKING int lockdep_rht_mutex_is_held(struct rhashtable *ht); int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash); #else static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht) { return 1; } static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) { return 1; } #endif /* CONFIG_PROVE_LOCKING */ void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj); void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU); static inline void rhashtable_walk_start(struct rhashtable_iter *iter) { (void)rhashtable_walk_start_check(iter); } void *rhashtable_walk_next(struct rhashtable_iter *iter); void *rhashtable_walk_peek(struct rhashtable_iter *iter); void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); void rhashtable_free_and_destroy(struct rhashtable *ht, void (*free_fn)(void *ptr, void *arg), void *arg); void rhashtable_destroy(struct rhashtable *ht); struct rhash_lock_head __rcu **rht_bucket_nested( const struct bucket_table *tbl, unsigned int hash); struct rhash_lock_head __rcu **__rht_bucket_nested( const struct bucket_table *tbl, unsigned int hash); struct rhash_lock_head __rcu **rht_bucket_nested_insert( struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash); #define rht_dereference(p, ht) \ rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) #define rht_dereference_rcu(p, ht) \ rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht)) #define rht_dereference_bucket(p, tbl, hash) \ rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash)) #define rht_dereference_bucket_rcu(p, tbl, hash) \ rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash)) #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) static inline struct rhash_lock_head __rcu *const *rht_bucket( const struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : &tbl->buckets[hash]; } static inline struct rhash_lock_head __rcu **rht_bucket_var( struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) : &tbl->buckets[hash]; } static inline struct rhash_lock_head __rcu **rht_bucket_insert( struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) : &tbl->buckets[hash]; } /* * We lock a bucket by setting BIT(0) in the pointer - this is always * zero in real pointers. The NULLS mark is never stored in the bucket, * rather we store NULL if the bucket is empty. * bit_spin_locks do not handle contention well, but the whole point * of the hashtable design is to achieve minimum per-bucket contention. * A nested hash table might not have a bucket pointer. In that case * we cannot get a lock. For remove and replace the bucket cannot be * interesting and doesn't need locking. * For insert we allocate the bucket if this is the last bucket_table, * and then take the lock. * Sometimes we unlock a bucket by writing a new pointer there. In that * case we don't need to unlock, but we do need to reset state such as * local_bh. For that we have rht_assign_unlock(). As rcu_assign_pointer() * provides the same release semantics that bit_spin_unlock() provides, * this is safe. * When we write to a bucket without unlocking, we use rht_assign_locked(). */ static inline void rht_lock(struct bucket_table *tbl, struct rhash_lock_head __rcu **bkt) { local_bh_disable(); bit_spin_lock(0, (unsigned long *)bkt); lock_map_acquire(&tbl->dep_map); } static inline void rht_lock_nested(struct bucket_table *tbl, struct rhash_lock_head __rcu **bucket, unsigned int subclass) { local_bh_disable(); bit_spin_lock(0, (unsigned long *)bucket); lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_); } static inline void rht_unlock(struct bucket_table *tbl, struct rhash_lock_head __rcu **bkt) { lock_map_release(&tbl->dep_map); bit_spin_unlock(0, (unsigned long *)bkt); local_bh_enable(); } static inline struct rhash_head *__rht_ptr( struct rhash_lock_head *p, struct rhash_lock_head __rcu *const *bkt) { return (struct rhash_head *) ((unsigned long)p & ~BIT(0) ?: (unsigned long)RHT_NULLS_MARKER(bkt)); } /* * Where 'bkt' is a bucket and might be locked: * rht_ptr_rcu() dereferences that pointer and clears the lock bit. * rht_ptr() dereferences in a context where the bucket is locked. * rht_ptr_exclusive() dereferences in a context where exclusive * access is guaranteed, such as when destroying the table. */ static inline struct rhash_head *rht_ptr_rcu( struct rhash_lock_head __rcu *const *bkt) { return __rht_ptr(rcu_dereference(*bkt), bkt); } static inline struct rhash_head *rht_ptr( struct rhash_lock_head __rcu *const *bkt, struct bucket_table *tbl, unsigned int hash) { return __rht_ptr(rht_dereference_bucket(*bkt, tbl, hash), bkt); } static inline struct rhash_head *rht_ptr_exclusive( struct rhash_lock_head __rcu *const *bkt) { return __rht_ptr(rcu_dereference_protected(*bkt, 1), bkt); } static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt, struct rhash_head *obj) { if (rht_is_a_nulls(obj)) obj = NULL; rcu_assign_pointer(*bkt, (void *)((unsigned long)obj | BIT(0))); } static inline void rht_assign_unlock(struct bucket_table *tbl, struct rhash_lock_head __rcu **bkt, struct rhash_head *obj) { if (rht_is_a_nulls(obj)) obj = NULL; lock_map_release(&tbl->dep_map); rcu_assign_pointer(*bkt, (void *)obj); preempt_enable(); __release(bitlock); local_bh_enable(); } /** * rht_for_each_from - iterate over hash chain from given head * @pos: the &struct rhash_head to use as a loop cursor. * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index */ #define rht_for_each_from(pos, head, tbl, hash) \ for (pos = head; \ !rht_is_a_nulls(pos); \ pos = rht_dereference_bucket((pos)->next, tbl, hash)) /** * rht_for_each - iterate over hash chain * @pos: the &struct rhash_head to use as a loop cursor. * @tbl: the &struct bucket_table * @hash: the hash value / bucket index */ #define rht_for_each(pos, tbl, hash) \ rht_for_each_from(pos, rht_ptr(rht_bucket(tbl, hash), tbl, hash), \ tbl, hash) /** * rht_for_each_entry_from - iterate over hash chain from given head * @tpos: the type * to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor. * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @member: name of the &struct rhash_head within the hashable struct. */ #define rht_for_each_entry_from(tpos, pos, head, tbl, hash, member) \ for (pos = head; \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ pos = rht_dereference_bucket((pos)->next, tbl, hash)) /** * rht_for_each_entry - iterate over hash chain of given type * @tpos: the type * to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor. * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @member: name of the &struct rhash_head within the hashable struct. */ #define rht_for_each_entry(tpos, pos, tbl, hash, member) \ rht_for_each_entry_from(tpos, pos, \ rht_ptr(rht_bucket(tbl, hash), tbl, hash), \ tbl, hash, member) /** * rht_for_each_entry_safe - safely iterate over hash chain of given type * @tpos: the type * to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor. * @next: the &struct rhash_head to use as next in loop cursor. * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @member: name of the &struct rhash_head within the hashable struct. * * This hash chain list-traversal primitive allows for the looped code to * remove the loop cursor from the list. */ #define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ for (pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash), \ next = !rht_is_a_nulls(pos) ? \ rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ pos = next, \ next = !rht_is_a_nulls(pos) ? \ rht_dereference_bucket(pos->next, tbl, hash) : NULL) /** * rht_for_each_rcu_from - iterate over rcu hash chain from given head * @pos: the &struct rhash_head to use as a loop cursor. * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * * This hash chain list-traversal primitive may safely run concurrently with * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_rcu_from(pos, head, tbl, hash) \ for (({barrier(); }), \ pos = head; \ !rht_is_a_nulls(pos); \ pos = rcu_dereference_raw(pos->next)) /** * rht_for_each_rcu - iterate over rcu hash chain * @pos: the &struct rhash_head to use as a loop cursor. * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * * This hash chain list-traversal primitive may safely run concurrently with * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_rcu(pos, tbl, hash) \ for (({barrier(); }), \ pos = rht_ptr_rcu(rht_bucket(tbl, hash)); \ !rht_is_a_nulls(pos); \ pos = rcu_dereference_raw(pos->next)) /** * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head * @tpos: the type * to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor. * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @member: name of the &struct rhash_head within the hashable struct. * * This hash chain list-traversal primitive may safely run concurrently with * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_entry_rcu_from(tpos, pos, head, tbl, hash, member) \ for (({barrier(); }), \ pos = head; \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ pos = rht_dereference_bucket_rcu(pos->next, tbl, hash)) /** * rht_for_each_entry_rcu - iterate over rcu hash chain of given type * @tpos: the type * to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor. * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @member: name of the &struct rhash_head within the hashable struct. * * This hash chain list-traversal primitive may safely run concurrently with * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ rht_for_each_entry_rcu_from(tpos, pos, \ rht_ptr_rcu(rht_bucket(tbl, hash)), \ tbl, hash, member) /** * rhl_for_each_rcu - iterate over rcu hash table list * @pos: the &struct rlist_head to use as a loop cursor. * @list: the head of the list * * This hash chain list-traversal primitive should be used on the * list returned by rhltable_lookup. */ #define rhl_for_each_rcu(pos, list) \ for (pos = list; pos; pos = rcu_dereference_raw(pos->next)) /** * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type * @tpos: the type * to use as a loop cursor. * @pos: the &struct rlist_head to use as a loop cursor. * @list: the head of the list * @member: name of the &struct rlist_head within the hashable struct. * * This hash chain list-traversal primitive should be used on the * list returned by rhltable_lookup. */ #define rhl_for_each_entry_rcu(tpos, pos, list, member) \ for (pos = list; pos && rht_entry(tpos, pos, member); \ pos = rcu_dereference_raw(pos->next)) static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, const void *obj) { struct rhashtable *ht = arg->ht; const char *ptr = obj; return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len); } /* Internal function, do not use. */ static inline struct rhash_head *__rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { struct rhashtable_compare_arg arg = { .ht = ht, .key = key, }; struct rhash_lock_head __rcu *const *bkt; struct bucket_table *tbl; struct rhash_head *he; unsigned int hash; tbl = rht_dereference_rcu(ht->tbl, ht); restart: hash = rht_key_hashfn(ht, tbl, key, params); bkt = rht_bucket(tbl, hash); do { rht_for_each_rcu_from(he, rht_ptr_rcu(bkt), tbl, hash) { if (params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, he)) : rhashtable_compare(&arg, rht_obj(ht, he))) continue; return he; } /* An object might have been moved to a different hash chain, * while we walk along it - better check and retry. */ } while (he != RHT_NULLS_MARKER(bkt)); /* Ensure we see any new tables. */ smp_rmb(); tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(tbl)) goto restart; return NULL; } /** * rhashtable_lookup - search hash table * @ht: hash table * @key: the pointer to the key * @params: hash table parameters * * Computes the hash value for the key and traverses the bucket chain looking * for a entry with an identical key. The first matching entry is returned. * * This must only be called under the RCU read lock. * * Returns the first entry on which the compare function returned true. */ static inline void *rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { struct rhash_head *he = __rhashtable_lookup(ht, key, params); return he ? rht_obj(ht, he) : NULL; } /** * rhashtable_lookup_fast - search hash table, without RCU read lock * @ht: hash table * @key: the pointer to the key * @params: hash table parameters * * Computes the hash value for the key and traverses the bucket chain looking * for a entry with an identical key. The first matching entry is returned. * * Only use this function when you have other mechanisms guaranteeing * that the object won't go away after the RCU read lock is released. * * Returns the first entry on which the compare function returned true. */ static inline void *rhashtable_lookup_fast( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { void *obj; rcu_read_lock(); obj = rhashtable_lookup(ht, key, params); rcu_read_unlock(); return obj; } /** * rhltable_lookup - search hash list table * @hlt: hash table * @key: the pointer to the key * @params: hash table parameters * * Computes the hash value for the key and traverses the bucket chain looking * for a entry with an identical key. All matching entries are returned * in a list. * * This must only be called under the RCU read lock. * * Returns the list of entries that match the given key. */ static inline struct rhlist_head *rhltable_lookup( struct rhltable *hlt, const void *key, const struct rhashtable_params params) { struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params); return he ? container_of(he, struct rhlist_head, rhead) : NULL; } /* Internal function, please use rhashtable_insert_fast() instead. This * function returns the existing element already in hashes in there is a clash, * otherwise it returns an error via ERR_PTR(). */ static inline void *__rhashtable_insert_fast( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { struct rhashtable_compare_arg arg = { .ht = ht, .key = key, }; struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct bucket_table *tbl; struct rhash_head *head; unsigned int hash; int elasticity; void *data; rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); hash = rht_head_hashfn(ht, tbl, obj, params); elasticity = RHT_ELASTICITY; bkt = rht_bucket_insert(ht, tbl, hash); data = ERR_PTR(-ENOMEM); if (!bkt) goto out; pprev = NULL; rht_lock(tbl, bkt); if (unlikely(rcu_access_pointer(tbl->future_tbl))) { slow_path: rht_unlock(tbl, bkt); rcu_read_unlock(); return rhashtable_insert_slow(ht, key, obj); } rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) { struct rhlist_head *plist; struct rhlist_head *list; elasticity--; if (!key || (params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, head)) : rhashtable_compare(&arg, rht_obj(ht, head)))) { pprev = &head->next; continue; } data = rht_obj(ht, head); if (!rhlist) goto out_unlock; list = container_of(obj, struct rhlist_head, rhead); plist = container_of(head, struct rhlist_head, rhead); RCU_INIT_POINTER(list->next, plist); head = rht_dereference_bucket(head->next, tbl, hash); RCU_INIT_POINTER(list->rhead.next, head); if (pprev) { rcu_assign_pointer(*pprev, obj); rht_unlock(tbl, bkt); } else rht_assign_unlock(tbl, bkt, obj); data = NULL; goto out; } if (elasticity <= 0) goto slow_path; data = ERR_PTR(-E2BIG); if (unlikely(rht_grow_above_max(ht, tbl))) goto out_unlock; if (unlikely(rht_grow_above_100(ht, tbl))) goto slow_path; /* Inserting at head of list makes unlocking free. */ head = rht_ptr(bkt, tbl, hash); RCU_INIT_POINTER(obj->next, head); if (rhlist) { struct rhlist_head *list; list = container_of(obj, struct rhlist_head, rhead); RCU_INIT_POINTER(list->next, NULL); } atomic_inc(&ht->nelems); rht_assign_unlock(tbl, bkt, obj); if (rht_grow_above_75(ht, tbl)) schedule_work(&ht->run_work); data = NULL; out: rcu_read_unlock(); return data; out_unlock: rht_unlock(tbl, bkt); goto out; } /** * rhashtable_insert_fast - insert object into hash table * @ht: hash table * @obj: pointer to hash head inside object * @params: hash table parameters * * Will take the per bucket bitlock to protect against mutual mutations * on the same bucket. Multiple insertions may occur in parallel unless * they map to the same bucket. * * It is safe to call this function from atomic context. * * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ static inline int rhashtable_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { void *ret; ret = __rhashtable_insert_fast(ht, NULL, obj, params, false); if (IS_ERR(ret)) return PTR_ERR(ret); return ret == NULL ? 0 : -EEXIST; } /** * rhltable_insert_key - insert object into hash list table * @hlt: hash list table * @key: the pointer to the key * @list: pointer to hash list head inside object * @params: hash table parameters * * Will take the per bucket bitlock to protect against mutual mutations * on the same bucket. Multiple insertions may occur in parallel unless * they map to the same bucket. * * It is safe to call this function from atomic context. * * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ static inline int rhltable_insert_key( struct rhltable *hlt, const void *key, struct rhlist_head *list, const struct rhashtable_params params) { return PTR_ERR(__rhashtable_insert_fast(&hlt->ht, key, &list->rhead, params, true)); } /** * rhltable_insert - insert object into hash list table * @hlt: hash list table * @list: pointer to hash list head inside object * @params: hash table parameters * * Will take the per bucket bitlock to protect against mutual mutations * on the same bucket. Multiple insertions may occur in parallel unless * they map to the same bucket. * * It is safe to call this function from atomic context. * * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ static inline int rhltable_insert( struct rhltable *hlt, struct rhlist_head *list, const struct rhashtable_params params) { const char *key = rht_obj(&hlt->ht, &list->rhead); key += params.key_offset; return rhltable_insert_key(hlt, key, list, params); } /** * rhashtable_lookup_insert_fast - lookup and insert object into hash table * @ht: hash table * @obj: pointer to hash head inside object * @params: hash table parameters * * This lookup function may only be used for fixed key hash table (key_len * parameter set). It will BUG() if used inappropriately. * * It is safe to call this function from atomic context. * * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ static inline int rhashtable_lookup_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { const char *key = rht_obj(ht, obj); void *ret; BUG_ON(ht->p.obj_hashfn); ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params, false); if (IS_ERR(ret)) return PTR_ERR(ret); return ret == NULL ? 0 : -EEXIST; } /** * rhashtable_lookup_get_insert_fast - lookup and insert object into hash table * @ht: hash table * @obj: pointer to hash head inside object * @params: hash table parameters * * Just like rhashtable_lookup_insert_fast(), but this function returns the * object if it exists, NULL if it did not and the insertion was successful, * and an ERR_PTR otherwise. */ static inline void *rhashtable_lookup_get_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { const char *key = rht_obj(ht, obj); BUG_ON(ht->p.obj_hashfn); return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params, false); } /** * rhashtable_lookup_insert_key - search and insert object to hash table * with explicit key * @ht: hash table * @key: key * @obj: pointer to hash head inside object * @params: hash table parameters * * Lookups may occur in parallel with hashtable mutations and resizing. * * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. * * Returns zero on success. */ static inline int rhashtable_lookup_insert_key( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) { void *ret; BUG_ON(!ht->p.obj_hashfn || !key); ret = __rhashtable_insert_fast(ht, key, obj, params, false); if (IS_ERR(ret)) return PTR_ERR(ret); return ret == NULL ? 0 : -EEXIST; } /** * rhashtable_lookup_get_insert_key - lookup and insert object into hash table * @ht: hash table * @key: key * @obj: pointer to hash head inside object * @params: hash table parameters * * Just like rhashtable_lookup_insert_key(), but this function returns the * object if it exists, NULL if it does not and the insertion was successful, * and an ERR_PTR otherwise. */ static inline void *rhashtable_lookup_get_insert_key( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) { BUG_ON(!ht->p.obj_hashfn || !key); return __rhashtable_insert_fast(ht, key, obj, params, false); } /* Internal function, please use rhashtable_remove_fast() instead */ static inline int __rhashtable_remove_fast_one( struct rhashtable *ht, struct bucket_table *tbl, struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; unsigned int hash; int err = -ENOENT; hash = rht_head_hashfn(ht, tbl, obj, params); bkt = rht_bucket_var(tbl, hash); if (!bkt) return -ENOENT; pprev = NULL; rht_lock(tbl, bkt); rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) { struct rhlist_head *list; list = container_of(he, struct rhlist_head, rhead); if (he != obj) { struct rhlist_head __rcu **lpprev; pprev = &he->next; if (!rhlist) continue; do { lpprev = &list->next; list = rht_dereference_bucket(list->next, tbl, hash); } while (list && obj != &list->rhead); if (!list) continue; list = rht_dereference_bucket(list->next, tbl, hash); RCU_INIT_POINTER(*lpprev, list); err = 0; break; } obj = rht_dereference_bucket(obj->next, tbl, hash); err = 1; if (rhlist) { list = rht_dereference_bucket(list->next, tbl, hash); if (list) { RCU_INIT_POINTER(list->rhead.next, obj); obj = &list->rhead; err = 0; } } if (pprev) { rcu_assign_pointer(*pprev, obj); rht_unlock(tbl, bkt); } else { rht_assign_unlock(tbl, bkt, obj); } goto unlocked; } rht_unlock(tbl, bkt); unlocked: if (err > 0) { atomic_dec(&ht->nelems); if (unlikely(ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))) schedule_work(&ht->run_work); err = 0; } return err; } /* Internal function, please use rhashtable_remove_fast() instead */ static inline int __rhashtable_remove_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { struct bucket_table *tbl; int err; rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); /* Because we have already taken (and released) the bucket * lock in old_tbl, if we find that future_tbl is not yet * visible then that guarantees the entry to still be in * the old tbl if it exists. */ while ((err = __rhashtable_remove_fast_one(ht, tbl, obj, params, rhlist)) && (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) ; rcu_read_unlock(); return err; } /** * rhashtable_remove_fast - remove object from hash table * @ht: hash table * @obj: pointer to hash head inside object * @params: hash table parameters * * Since the hash chain is single linked, the removal operation needs to * walk the bucket chain upon removal. The removal operation is thus * considerable slow if the hash table is not correctly sized. * * Will automatically shrink the table if permitted when residency drops * below 30%. * * Returns zero on success, -ENOENT if the entry could not be found. */ static inline int rhashtable_remove_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { return __rhashtable_remove_fast(ht, obj, params, false); } /** * rhltable_remove - remove object from hash list table * @hlt: hash list table * @list: pointer to hash list head inside object * @params: hash table parameters * * Since the hash chain is single linked, the removal operation needs to * walk the bucket chain upon removal. The removal operation is thus * considerable slow if the hash table is not correctly sized. * * Will automatically shrink the table if permitted when residency drops * below 30% * * Returns zero on success, -ENOENT if the entry could not be found. */ static inline int rhltable_remove( struct rhltable *hlt, struct rhlist_head *list, const struct rhashtable_params params) { return __rhashtable_remove_fast(&hlt->ht, &list->rhead, params, true); } /* Internal function, please use rhashtable_replace_fast() instead */ static inline int __rhashtable_replace_fast( struct rhashtable *ht, struct bucket_table *tbl, struct rhash_head *obj_old, struct rhash_head *obj_new, const struct rhashtable_params params) { struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; unsigned int hash; int err = -ENOENT; /* Minimally, the old and new objects must have same hash * (which should mean identifiers are the same). */ hash = rht_head_hashfn(ht, tbl, obj_old, params); if (hash != rht_head_hashfn(ht, tbl, obj_new, params)) return -EINVAL; bkt = rht_bucket_var(tbl, hash); if (!bkt) return -ENOENT; pprev = NULL; rht_lock(tbl, bkt); rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) { if (he != obj_old) { pprev = &he->next; continue; } rcu_assign_pointer(obj_new->next, obj_old->next); if (pprev) { rcu_assign_pointer(*pprev, obj_new); rht_unlock(tbl, bkt); } else { rht_assign_unlock(tbl, bkt, obj_new); } err = 0; goto unlocked; } rht_unlock(tbl, bkt); unlocked: return err; } /** * rhashtable_replace_fast - replace an object in hash table * @ht: hash table * @obj_old: pointer to hash head inside object being replaced * @obj_new: pointer to hash head inside object which is new * @params: hash table parameters * * Replacing an object doesn't affect the number of elements in the hash table * or bucket, so we don't need to worry about shrinking or expanding the * table here. * * Returns zero on success, -ENOENT if the entry could not be found, * -EINVAL if hash is not the same for the old and new objects. */ static inline int rhashtable_replace_fast( struct rhashtable *ht, struct rhash_head *obj_old, struct rhash_head *obj_new, const struct rhashtable_params params) { struct bucket_table *tbl; int err; rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); /* Because we have already taken (and released) the bucket * lock in old_tbl, if we find that future_tbl is not yet * visible then that guarantees the entry to still be in * the old tbl if it exists. */ while ((err = __rhashtable_replace_fast(ht, tbl, obj_old, obj_new, params)) && (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) ; rcu_read_unlock(); return err; } /** * rhltable_walk_enter - Initialise an iterator * @hlt: Table to walk over * @iter: Hash table Iterator * * This function prepares a hash table walk. * * Note that if you restart a walk after rhashtable_walk_stop you * may see the same object twice. Also, you may miss objects if * there are removals in between rhashtable_walk_stop and the next * call to rhashtable_walk_start. * * For a completely stable walk you should construct your own data * structure outside the hash table. * * This function may be called from any process context, including * non-preemptable context, but cannot be called from softirq or * hardirq context. * * You must call rhashtable_walk_exit after this function returns. */ static inline void rhltable_walk_enter(struct rhltable *hlt, struct rhashtable_iter *iter) { return rhashtable_walk_enter(&hlt->ht, iter); } /** * rhltable_free_and_destroy - free elements and destroy hash list table * @hlt: the hash list table to destroy * @free_fn: callback to release resources of element * @arg: pointer passed to free_fn * * See documentation for rhashtable_free_and_destroy. */ static inline void rhltable_free_and_destroy(struct rhltable *hlt, void (*free_fn)(void *ptr, void *arg), void *arg) { return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg); } static inline void rhltable_destroy(struct rhltable *hlt) { return rhltable_free_and_destroy(hlt, NULL, NULL); } #endif /* _LINUX_RHASHTABLE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __SEQ_FILE_NET_H__ #define __SEQ_FILE_NET_H__ #include <linux/seq_file.h> struct net; extern struct net init_net; struct seq_net_private { #ifdef CONFIG_NET_NS struct net *net; #endif }; static inline struct net *seq_file_net(struct seq_file *seq) { #ifdef CONFIG_NET_NS return ((struct seq_net_private *)seq->private)->net; #else return &init_net; #endif } /* * This one is needed for proc_create_net_single since net is stored directly * in private not as a struct i.e. seq_file_net can't be used. */ static inline struct net *seq_file_single_net(struct seq_file *seq) { #ifdef CONFIG_NET_NS return (struct net *)seq->private; #else return &init_net; #endif } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_NETLINK_H #define __NET_NETLINK_H #include <linux/types.h> #include <linux/netlink.h> #include <linux/jiffies.h> #include <linux/in6.h> /* ======================================================================== * Netlink Messages and Attributes Interface (As Seen On TV) * ------------------------------------------------------------------------ * Messages Interface * ------------------------------------------------------------------------ * * Message Format: * <--- nlmsg_total_size(payload) ---> * <-- nlmsg_msg_size(payload) -> * +----------+- - -+-------------+- - -+-------- - - * | nlmsghdr | Pad | Payload | Pad | nlmsghdr * +----------+- - -+-------------+- - -+-------- - - * nlmsg_data(nlh)---^ ^ * nlmsg_next(nlh)-----------------------+ * * Payload Format: * <---------------------- nlmsg_len(nlh) ---------------------> * <------ hdrlen ------> <- nlmsg_attrlen(nlh, hdrlen) -> * +----------------------+- - -+--------------------------------+ * | Family Header | Pad | Attributes | * +----------------------+- - -+--------------------------------+ * nlmsg_attrdata(nlh, hdrlen)---^ * * Data Structures: * struct nlmsghdr netlink message header * * Message Construction: * nlmsg_new() create a new netlink message * nlmsg_put() add a netlink message to an skb * nlmsg_put_answer() callback based nlmsg_put() * nlmsg_end() finalize netlink message * nlmsg_get_pos() return current position in message * nlmsg_trim() trim part of message * nlmsg_cancel() cancel message construction * nlmsg_free() free a netlink message * * Message Sending: * nlmsg_multicast() multicast message to several groups * nlmsg_unicast() unicast a message to a single socket * nlmsg_notify() send notification message * * Message Length Calculations: * nlmsg_msg_size(payload) length of message w/o padding * nlmsg_total_size(payload) length of message w/ padding * nlmsg_padlen(payload) length of padding at tail * * Message Payload Access: * nlmsg_data(nlh) head of message payload * nlmsg_len(nlh) length of message payload * nlmsg_attrdata(nlh, hdrlen) head of attributes data * nlmsg_attrlen(nlh, hdrlen) length of attributes data * * Message Parsing: * nlmsg_ok(nlh, remaining) does nlh fit into remaining bytes? * nlmsg_next(nlh, remaining) get next netlink message * nlmsg_parse() parse attributes of a message * nlmsg_find_attr() find an attribute in a message * nlmsg_for_each_msg() loop over all messages * nlmsg_validate() validate netlink message incl. attrs * nlmsg_for_each_attr() loop over all attributes * * Misc: * nlmsg_report() report back to application? * * ------------------------------------------------------------------------ * Attributes Interface * ------------------------------------------------------------------------ * * Attribute Format: * <------- nla_total_size(payload) -------> * <---- nla_attr_size(payload) -----> * +----------+- - -+- - - - - - - - - +- - -+-------- - - * | Header | Pad | Payload | Pad | Header * +----------+- - -+- - - - - - - - - +- - -+-------- - - * <- nla_len(nla) -> ^ * nla_data(nla)----^ | * nla_next(nla)-----------------------------' * * Data Structures: * struct nlattr netlink attribute header * * Attribute Construction: * nla_reserve(skb, type, len) reserve room for an attribute * nla_reserve_nohdr(skb, len) reserve room for an attribute w/o hdr * nla_put(skb, type, len, data) add attribute to skb * nla_put_nohdr(skb, len, data) add attribute w/o hdr * nla_append(skb, len, data) append data to skb * * Attribute Construction for Basic Types: * nla_put_u8(skb, type, value) add u8 attribute to skb * nla_put_u16(skb, type, value) add u16 attribute to skb * nla_put_u32(skb, type, value) add u32 attribute to skb * nla_put_u64_64bit(skb, type, * value, padattr) add u64 attribute to skb * nla_put_s8(skb, type, value) add s8 attribute to skb * nla_put_s16(skb, type, value) add s16 attribute to skb * nla_put_s32(skb, type, value) add s32 attribute to skb * nla_put_s64(skb, type, value, * padattr) add s64 attribute to skb * nla_put_string(skb, type, str) add string attribute to skb * nla_put_flag(skb, type) add flag attribute to skb * nla_put_msecs(skb, type, jiffies, * padattr) add msecs attribute to skb * nla_put_in_addr(skb, type, addr) add IPv4 address attribute to skb * nla_put_in6_addr(skb, type, addr) add IPv6 address attribute to skb * * Nested Attributes Construction: * nla_nest_start(skb, type) start a nested attribute * nla_nest_end(skb, nla) finalize a nested attribute * nla_nest_cancel(skb, nla) cancel nested attribute construction * * Attribute Length Calculations: * nla_attr_size(payload) length of attribute w/o padding * nla_total_size(payload) length of attribute w/ padding * nla_padlen(payload) length of padding * * Attribute Payload Access: * nla_data(nla) head of attribute payload * nla_len(nla) length of attribute payload * * Attribute Payload Access for Basic Types: * nla_get_u8(nla) get payload for a u8 attribute * nla_get_u16(nla) get payload for a u16 attribute * nla_get_u32(nla) get payload for a u32 attribute * nla_get_u64(nla) get payload for a u64 attribute * nla_get_s8(nla) get payload for a s8 attribute * nla_get_s16(nla) get payload for a s16 attribute * nla_get_s32(nla) get payload for a s32 attribute * nla_get_s64(nla) get payload for a s64 attribute * nla_get_flag(nla) return 1 if flag is true * nla_get_msecs(nla) get payload for a msecs attribute * * Attribute Misc: * nla_memcpy(dest, nla, count) copy attribute into memory * nla_memcmp(nla, data, size) compare attribute with memory area * nla_strlcpy(dst, nla, size) copy attribute to a sized string * nla_strcmp(nla, str) compare attribute with string * * Attribute Parsing: * nla_ok(nla, remaining) does nla fit into remaining bytes? * nla_next(nla, remaining) get next netlink attribute * nla_validate() validate a stream of attributes * nla_validate_nested() validate a stream of nested attributes * nla_find() find attribute in stream of attributes * nla_find_nested() find attribute in nested attributes * nla_parse() parse and validate stream of attrs * nla_parse_nested() parse nested attributes * nla_for_each_attr() loop over all attributes * nla_for_each_nested() loop over the nested attributes *========================================================================= */ /** * Standard attribute types to specify validation policy */ enum { NLA_UNSPEC, NLA_U8, NLA_U16, NLA_U32, NLA_U64, NLA_STRING, NLA_FLAG, NLA_MSECS, NLA_NESTED, NLA_NESTED_ARRAY, NLA_NUL_STRING, NLA_BINARY, NLA_S8, NLA_S16, NLA_S32, NLA_S64, NLA_BITFIELD32, NLA_REJECT, __NLA_TYPE_MAX, }; #define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1) struct netlink_range_validation { u64 min, max; }; struct netlink_range_validation_signed { s64 min, max; }; enum nla_policy_validation { NLA_VALIDATE_NONE, NLA_VALIDATE_RANGE, NLA_VALIDATE_RANGE_WARN_TOO_LONG, NLA_VALIDATE_MIN, NLA_VALIDATE_MAX, NLA_VALIDATE_MASK, NLA_VALIDATE_RANGE_PTR, NLA_VALIDATE_FUNCTION, }; /** * struct nla_policy - attribute validation policy * @type: Type of attribute or NLA_UNSPEC * @validation_type: type of attribute validation done in addition to * type-specific validation (e.g. range, function call), see * &enum nla_policy_validation * @len: Type specific length of payload * * Policies are defined as arrays of this struct, the array must be * accessible by attribute type up to the highest identifier to be expected. * * Meaning of `len' field: * NLA_STRING Maximum length of string * NLA_NUL_STRING Maximum length of string (excluding NUL) * NLA_FLAG Unused * NLA_BINARY Maximum length of attribute payload * (but see also below with the validation type) * NLA_NESTED, * NLA_NESTED_ARRAY Length verification is done by checking len of * nested header (or empty); len field is used if * nested_policy is also used, for the max attr * number in the nested policy. * NLA_U8, NLA_U16, * NLA_U32, NLA_U64, * NLA_S8, NLA_S16, * NLA_S32, NLA_S64, * NLA_MSECS Leaving the length field zero will verify the * given type fits, using it verifies minimum length * just like "All other" * NLA_BITFIELD32 Unused * NLA_REJECT Unused * All other Minimum length of attribute payload * * Meaning of validation union: * NLA_BITFIELD32 This is a 32-bit bitmap/bitselector attribute and * `bitfield32_valid' is the u32 value of valid flags * NLA_REJECT This attribute is always rejected and `reject_message' * may point to a string to report as the error instead * of the generic one in extended ACK. * NLA_NESTED `nested_policy' to a nested policy to validate, must * also set `len' to the max attribute number. Use the * provided NLA_POLICY_NESTED() macro. * Note that nla_parse() will validate, but of course not * parse, the nested sub-policies. * NLA_NESTED_ARRAY `nested_policy' points to a nested policy to validate, * must also set `len' to the max attribute number. Use * the provided NLA_POLICY_NESTED_ARRAY() macro. * The difference to NLA_NESTED is the structure: * NLA_NESTED has the nested attributes directly inside * while an array has the nested attributes at another * level down and the attribute types directly in the * nesting don't matter. * NLA_U8, * NLA_U16, * NLA_U32, * NLA_U64, * NLA_S8, * NLA_S16, * NLA_S32, * NLA_S64 The `min' and `max' fields are used depending on the * validation_type field, if that is min/max/range then * the min, max or both are used (respectively) to check * the value of the integer attribute. * Note that in the interest of code simplicity and * struct size both limits are s16, so you cannot * enforce a range that doesn't fall within the range * of s16 - do that as usual in the code instead. * Use the NLA_POLICY_MIN(), NLA_POLICY_MAX() and * NLA_POLICY_RANGE() macros. * NLA_U8, * NLA_U16, * NLA_U32, * NLA_U64 If the validation_type field instead is set to * NLA_VALIDATE_RANGE_PTR, `range' must be a pointer * to a struct netlink_range_validation that indicates * the min/max values. * Use NLA_POLICY_FULL_RANGE(). * NLA_S8, * NLA_S16, * NLA_S32, * NLA_S64 If the validation_type field instead is set to * NLA_VALIDATE_RANGE_PTR, `range_signed' must be a * pointer to a struct netlink_range_validation_signed * that indicates the min/max values. * Use NLA_POLICY_FULL_RANGE_SIGNED(). * * NLA_BINARY If the validation type is like the ones for integers * above, then the min/max length (not value like for * integers) of the attribute is enforced. * * All other Unused - but note that it's a union * * Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN: * NLA_BINARY Validation function called for the attribute. * All other Unused - but note that it's a union * * Example: * * static const u32 myvalidflags = 0xff231023; * * static const struct nla_policy my_policy[ATTR_MAX+1] = { * [ATTR_FOO] = { .type = NLA_U16 }, * [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ }, * [ATTR_BAZ] = NLA_POLICY_EXACT_LEN(sizeof(struct mystruct)), * [ATTR_GOO] = NLA_POLICY_BITFIELD32(myvalidflags), * }; */ struct nla_policy { u8 type; u8 validation_type; u16 len; union { const u32 bitfield32_valid; const u32 mask; const char *reject_message; const struct nla_policy *nested_policy; struct netlink_range_validation *range; struct netlink_range_validation_signed *range_signed; struct { s16 min, max; }; int (*validate)(const struct nlattr *attr, struct netlink_ext_ack *extack); /* This entry is special, and used for the attribute at index 0 * only, and specifies special data about the policy, namely it * specifies the "boundary type" where strict length validation * starts for any attribute types >= this value, also, strict * nesting validation starts here. * * Additionally, it means that NLA_UNSPEC is actually NLA_REJECT * for any types >= this, so need to use NLA_POLICY_MIN_LEN() to * get the previous pure { .len = xyz } behaviour. The advantage * of this is that types not specified in the policy will be * rejected. * * For completely new families it should be set to 1 so that the * validation is enforced for all attributes. For existing ones * it should be set at least when new attributes are added to * the enum used by the policy, and be set to the new value that * was added to enforce strict validation from thereon. */ u16 strict_start_type; }; }; #define NLA_POLICY_ETH_ADDR NLA_POLICY_EXACT_LEN(ETH_ALEN) #define NLA_POLICY_ETH_ADDR_COMPAT NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN) #define _NLA_POLICY_NESTED(maxattr, policy) \ { .type = NLA_NESTED, .nested_policy = policy, .len = maxattr } #define _NLA_POLICY_NESTED_ARRAY(maxattr, policy) \ { .type = NLA_NESTED_ARRAY, .nested_policy = policy, .len = maxattr } #define NLA_POLICY_NESTED(policy) \ _NLA_POLICY_NESTED(ARRAY_SIZE(policy) - 1, policy) #define NLA_POLICY_NESTED_ARRAY(policy) \ _NLA_POLICY_NESTED_ARRAY(ARRAY_SIZE(policy) - 1, policy) #define NLA_POLICY_BITFIELD32(valid) \ { .type = NLA_BITFIELD32, .bitfield32_valid = valid } #define __NLA_IS_UINT_TYPE(tp) \ (tp == NLA_U8 || tp == NLA_U16 || tp == NLA_U32 || tp == NLA_U64) #define __NLA_IS_SINT_TYPE(tp) \ (tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64) #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition)) #define NLA_ENSURE_UINT_TYPE(tp) \ (__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp)) + tp) #define NLA_ENSURE_UINT_OR_BINARY_TYPE(tp) \ (__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) || \ tp == NLA_MSECS || \ tp == NLA_BINARY) + tp) #define NLA_ENSURE_SINT_TYPE(tp) \ (__NLA_ENSURE(__NLA_IS_SINT_TYPE(tp)) + tp) #define NLA_ENSURE_INT_OR_BINARY_TYPE(tp) \ (__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) || \ __NLA_IS_SINT_TYPE(tp) || \ tp == NLA_MSECS || \ tp == NLA_BINARY) + tp) #define NLA_ENSURE_NO_VALIDATION_PTR(tp) \ (__NLA_ENSURE(tp != NLA_BITFIELD32 && \ tp != NLA_REJECT && \ tp != NLA_NESTED && \ tp != NLA_NESTED_ARRAY) + tp) #define NLA_POLICY_RANGE(tp, _min, _max) { \ .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \ .validation_type = NLA_VALIDATE_RANGE, \ .min = _min, \ .max = _max \ } #define NLA_POLICY_FULL_RANGE(tp, _range) { \ .type = NLA_ENSURE_UINT_OR_BINARY_TYPE(tp), \ .validation_type = NLA_VALIDATE_RANGE_PTR, \ .range = _range, \ } #define NLA_POLICY_FULL_RANGE_SIGNED(tp, _range) { \ .type = NLA_ENSURE_SINT_TYPE(tp), \ .validation_type = NLA_VALIDATE_RANGE_PTR, \ .range_signed = _range, \ } #define NLA_POLICY_MIN(tp, _min) { \ .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \ .validation_type = NLA_VALIDATE_MIN, \ .min = _min, \ } #define NLA_POLICY_MAX(tp, _max) { \ .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \ .validation_type = NLA_VALIDATE_MAX, \ .max = _max, \ } #define NLA_POLICY_MASK(tp, _mask) { \ .type = NLA_ENSURE_UINT_TYPE(tp), \ .validation_type = NLA_VALIDATE_MASK, \ .mask = _mask, \ } #define NLA_POLICY_VALIDATE_FN(tp, fn, ...) { \ .type = NLA_ENSURE_NO_VALIDATION_PTR(tp), \ .validation_type = NLA_VALIDATE_FUNCTION, \ .validate = fn, \ .len = __VA_ARGS__ + 0, \ } #define NLA_POLICY_EXACT_LEN(_len) NLA_POLICY_RANGE(NLA_BINARY, _len, _len) #define NLA_POLICY_EXACT_LEN_WARN(_len) { \ .type = NLA_BINARY, \ .validation_type = NLA_VALIDATE_RANGE_WARN_TOO_LONG, \ .min = _len, \ .max = _len \ } #define NLA_POLICY_MIN_LEN(_len) NLA_POLICY_MIN(NLA_BINARY, _len) /** * struct nl_info - netlink source information * @nlh: Netlink message header of original request * @nl_net: Network namespace * @portid: Netlink PORTID of requesting application * @skip_notify: Skip netlink notifications to user space * @skip_notify_kernel: Skip selected in-kernel notifications */ struct nl_info { struct nlmsghdr *nlh; struct net *nl_net; u32 portid; u8 skip_notify:1, skip_notify_kernel:1; }; /** * enum netlink_validation - netlink message/attribute validation levels * @NL_VALIDATE_LIBERAL: Old-style "be liberal" validation, not caring about * extra data at the end of the message, attributes being longer than * they should be, or unknown attributes being present. * @NL_VALIDATE_TRAILING: Reject junk data encountered after attribute parsing. * @NL_VALIDATE_MAXTYPE: Reject attributes > max type; Together with _TRAILING * this is equivalent to the old nla_parse_strict()/nlmsg_parse_strict(). * @NL_VALIDATE_UNSPEC: Reject attributes with NLA_UNSPEC in the policy. * This can safely be set by the kernel when the given policy has no * NLA_UNSPEC anymore, and can thus be used to ensure policy entries * are enforced going forward. * @NL_VALIDATE_STRICT_ATTRS: strict attribute policy parsing (e.g. * U8, U16, U32 must have exact size, etc.) * @NL_VALIDATE_NESTED: Check that NLA_F_NESTED is set for NLA_NESTED(_ARRAY) * and unset for other policies. */ enum netlink_validation { NL_VALIDATE_LIBERAL = 0, NL_VALIDATE_TRAILING = BIT(0), NL_VALIDATE_MAXTYPE = BIT(1), NL_VALIDATE_UNSPEC = BIT(2), NL_VALIDATE_STRICT_ATTRS = BIT(3), NL_VALIDATE_NESTED = BIT(4), }; #define NL_VALIDATE_DEPRECATED_STRICT (NL_VALIDATE_TRAILING |\ NL_VALIDATE_MAXTYPE) #define NL_VALIDATE_STRICT (NL_VALIDATE_TRAILING |\ NL_VALIDATE_MAXTYPE |\ NL_VALIDATE_UNSPEC |\ NL_VALIDATE_STRICT_ATTRS |\ NL_VALIDATE_NESTED) int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, struct nlmsghdr *, struct netlink_ext_ack *)); int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, unsigned int group, int report, gfp_t flags); int __nla_validate(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack); int __nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, int len, const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack); int nla_policy_len(const struct nla_policy *, int); struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype); size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize); char *nla_strdup(const struct nlattr *nla, gfp_t flags); int nla_memcpy(void *dest, const struct nlattr *src, int count); int nla_memcmp(const struct nlattr *nla, const void *data, size_t size); int nla_strcmp(const struct nlattr *nla, const char *str); struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen); struct nlattr *__nla_reserve_64bit(struct sk_buff *skb, int attrtype, int attrlen, int padattr); void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen); struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen); struct nlattr *nla_reserve_64bit(struct sk_buff *skb, int attrtype, int attrlen, int padattr); void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen); void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data); void __nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen, const void *data, int padattr); void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data); int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data); int nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen, const void *data, int padattr); int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data); int nla_append(struct sk_buff *skb, int attrlen, const void *data); /************************************************************************** * Netlink Messages **************************************************************************/ /** * nlmsg_msg_size - length of netlink message not including padding * @payload: length of message payload */ static inline int nlmsg_msg_size(int payload) { return NLMSG_HDRLEN + payload; } /** * nlmsg_total_size - length of netlink message including padding * @payload: length of message payload */ static inline int nlmsg_total_size(int payload) { return NLMSG_ALIGN(nlmsg_msg_size(payload)); } /** * nlmsg_padlen - length of padding at the message's tail * @payload: length of message payload */ static inline int nlmsg_padlen(int payload) { return nlmsg_total_size(payload) - nlmsg_msg_size(payload); } /** * nlmsg_data - head of message payload * @nlh: netlink message header */ static inline void *nlmsg_data(const struct nlmsghdr *nlh) { return (unsigned char *) nlh + NLMSG_HDRLEN; } /** * nlmsg_len - length of message payload * @nlh: netlink message header */ static inline int nlmsg_len(const struct nlmsghdr *nlh) { return nlh->nlmsg_len - NLMSG_HDRLEN; } /** * nlmsg_attrdata - head of attributes data * @nlh: netlink message header * @hdrlen: length of family specific header */ static inline struct nlattr *nlmsg_attrdata(const struct nlmsghdr *nlh, int hdrlen) { unsigned char *data = nlmsg_data(nlh); return (struct nlattr *) (data + NLMSG_ALIGN(hdrlen)); } /** * nlmsg_attrlen - length of attributes data * @nlh: netlink message header * @hdrlen: length of family specific header */ static inline int nlmsg_attrlen(const struct nlmsghdr *nlh, int hdrlen) { return nlmsg_len(nlh) - NLMSG_ALIGN(hdrlen); } /** * nlmsg_ok - check if the netlink message fits into the remaining bytes * @nlh: netlink message header * @remaining: number of bytes remaining in message stream */ static inline int nlmsg_ok(const struct nlmsghdr *nlh, int remaining) { return (remaining >= (int) sizeof(struct nlmsghdr) && nlh->nlmsg_len >= sizeof(struct nlmsghdr) && nlh->nlmsg_len <= remaining); } /** * nlmsg_next - next netlink message in message stream * @nlh: netlink message header * @remaining: number of bytes remaining in message stream * * Returns the next netlink message in the message stream and * decrements remaining by the size of the current message. */ static inline struct nlmsghdr * nlmsg_next(const struct nlmsghdr *nlh, int *remaining) { int totlen = NLMSG_ALIGN(nlh->nlmsg_len); *remaining -= totlen; return (struct nlmsghdr *) ((unsigned char *) nlh + totlen); } /** * nla_parse - Parse a stream of attributes into a tb buffer * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @head: head of attribute stream * @len: length of attribute stream * @policy: validation policy * @extack: extended ACK pointer * * Parses a stream of attributes and stores a pointer to each attribute in * the tb array accessible via the attribute type. Attributes with a type * exceeding maxtype will be rejected, policy must be specified, attributes * will be validated in the strictest way possible. * * Returns 0 on success or a negative error code. */ static inline int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, int len, const struct nla_policy *policy, struct netlink_ext_ack *extack) { return __nla_parse(tb, maxtype, head, len, policy, NL_VALIDATE_STRICT, extack); } /** * nla_parse_deprecated - Parse a stream of attributes into a tb buffer * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @head: head of attribute stream * @len: length of attribute stream * @policy: validation policy * @extack: extended ACK pointer * * Parses a stream of attributes and stores a pointer to each attribute in * the tb array accessible via the attribute type. Attributes with a type * exceeding maxtype will be ignored and attributes from the policy are not * always strictly validated (only for new attributes). * * Returns 0 on success or a negative error code. */ static inline int nla_parse_deprecated(struct nlattr **tb, int maxtype, const struct nlattr *head, int len, const struct nla_policy *policy, struct netlink_ext_ack *extack) { return __nla_parse(tb, maxtype, head, len, policy, NL_VALIDATE_LIBERAL, extack); } /** * nla_parse_deprecated_strict - Parse a stream of attributes into a tb buffer * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @head: head of attribute stream * @len: length of attribute stream * @policy: validation policy * @extack: extended ACK pointer * * Parses a stream of attributes and stores a pointer to each attribute in * the tb array accessible via the attribute type. Attributes with a type * exceeding maxtype will be rejected as well as trailing data, but the * policy is not completely strictly validated (only for new attributes). * * Returns 0 on success or a negative error code. */ static inline int nla_parse_deprecated_strict(struct nlattr **tb, int maxtype, const struct nlattr *head, int len, const struct nla_policy *policy, struct netlink_ext_ack *extack) { return __nla_parse(tb, maxtype, head, len, policy, NL_VALIDATE_DEPRECATED_STRICT, extack); } /** * __nlmsg_parse - parse attributes of a netlink message * @nlh: netlink message header * @hdrlen: length of family specific header * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @policy: validation policy * @validate: validation strictness * @extack: extended ACK report struct * * See nla_parse() */ static inline int __nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, struct nlattr *tb[], int maxtype, const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack) { if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) { NL_SET_ERR_MSG(extack, "Invalid header length"); return -EINVAL; } return __nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), nlmsg_attrlen(nlh, hdrlen), policy, validate, extack); } /** * nlmsg_parse - parse attributes of a netlink message * @nlh: netlink message header * @hdrlen: length of family specific header * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @extack: extended ACK report struct * * See nla_parse() */ static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, struct nlattr *tb[], int maxtype, const struct nla_policy *policy, struct netlink_ext_ack *extack) { return __nlmsg_parse(nlh, hdrlen, tb, maxtype, policy, NL_VALIDATE_STRICT, extack); } /** * nlmsg_parse_deprecated - parse attributes of a netlink message * @nlh: netlink message header * @hdrlen: length of family specific header * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @extack: extended ACK report struct * * See nla_parse_deprecated() */ static inline int nlmsg_parse_deprecated(const struct nlmsghdr *nlh, int hdrlen, struct nlattr *tb[], int maxtype, const struct nla_policy *policy, struct netlink_ext_ack *extack) { return __nlmsg_parse(nlh, hdrlen, tb, maxtype, policy, NL_VALIDATE_LIBERAL, extack); } /** * nlmsg_parse_deprecated_strict - parse attributes of a netlink message * @nlh: netlink message header * @hdrlen: length of family specific header * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @extack: extended ACK report struct * * See nla_parse_deprecated_strict() */ static inline int nlmsg_parse_deprecated_strict(const struct nlmsghdr *nlh, int hdrlen, struct nlattr *tb[], int maxtype, const struct nla_policy *policy, struct netlink_ext_ack *extack) { return __nlmsg_parse(nlh, hdrlen, tb, maxtype, policy, NL_VALIDATE_DEPRECATED_STRICT, extack); } /** * nlmsg_find_attr - find a specific attribute in a netlink message * @nlh: netlink message header * @hdrlen: length of familiy specific header * @attrtype: type of attribute to look for * * Returns the first attribute which matches the specified type. */ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh, int hdrlen, int attrtype) { return nla_find(nlmsg_attrdata(nlh, hdrlen), nlmsg_attrlen(nlh, hdrlen), attrtype); } /** * nla_validate_deprecated - Validate a stream of attributes * @head: head of attribute stream * @len: length of attribute stream * @maxtype: maximum attribute type to be expected * @policy: validation policy * @validate: validation strictness * @extack: extended ACK report struct * * Validates all attributes in the specified attribute stream against the * specified policy. Validation is done in liberal mode. * See documenation of struct nla_policy for more details. * * Returns 0 on success or a negative error code. */ static inline int nla_validate_deprecated(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy, struct netlink_ext_ack *extack) { return __nla_validate(head, len, maxtype, policy, NL_VALIDATE_LIBERAL, extack); } /** * nla_validate - Validate a stream of attributes * @head: head of attribute stream * @len: length of attribute stream * @maxtype: maximum attribute type to be expected * @policy: validation policy * @extack: extended ACK report struct * * Validates all attributes in the specified attribute stream against the * specified policy. Validation is done in strict mode. * See documenation of struct nla_policy for more details. * * Returns 0 on success or a negative error code. */ static inline int nla_validate(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy, struct netlink_ext_ack *extack) { return __nla_validate(head, len, maxtype, policy, NL_VALIDATE_STRICT, extack); } /** * nlmsg_validate_deprecated - validate a netlink message including attributes * @nlh: netlinket message header * @hdrlen: length of familiy specific header * @maxtype: maximum attribute type to be expected * @policy: validation policy * @extack: extended ACK report struct */ static inline int nlmsg_validate_deprecated(const struct nlmsghdr *nlh, int hdrlen, int maxtype, const struct nla_policy *policy, struct netlink_ext_ack *extack) { if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) return -EINVAL; return __nla_validate(nlmsg_attrdata(nlh, hdrlen), nlmsg_attrlen(nlh, hdrlen), maxtype, policy, NL_VALIDATE_LIBERAL, extack); } /** * nlmsg_report - need to report back to application? * @nlh: netlink message header * * Returns 1 if a report back to the application is requested. */ static inline int nlmsg_report(const struct nlmsghdr *nlh) { return !!(nlh->nlmsg_flags & NLM_F_ECHO); } /** * nlmsg_for_each_attr - iterate over a stream of attributes * @pos: loop counter, set to current attribute * @nlh: netlink message header * @hdrlen: length of familiy specific header * @rem: initialized to len, holds bytes currently remaining in stream */ #define nlmsg_for_each_attr(pos, nlh, hdrlen, rem) \ nla_for_each_attr(pos, nlmsg_attrdata(nlh, hdrlen), \ nlmsg_attrlen(nlh, hdrlen), rem) /** * nlmsg_put - Add a new netlink message to an skb * @skb: socket buffer to store message in * @portid: netlink PORTID of requesting application * @seq: sequence number of message * @type: message type * @payload: length of message payload * @flags: message flags * * Returns NULL if the tailroom of the skb is insufficient to store * the message header and payload. */ static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int payload, int flags) { if (unlikely(skb_tailroom(skb) < nlmsg_total_size(payload))) return NULL; return __nlmsg_put(skb, portid, seq, type, payload, flags); } /** * nlmsg_put_answer - Add a new callback based netlink message to an skb * @skb: socket buffer to store message in * @cb: netlink callback * @type: message type * @payload: length of message payload * @flags: message flags * * Returns NULL if the tailroom of the skb is insufficient to store * the message header and payload. */ static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb, struct netlink_callback *cb, int type, int payload, int flags) { return nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, type, payload, flags); } /** * nlmsg_new - Allocate a new netlink message * @payload: size of the message payload * @flags: the type of memory to allocate. * * Use NLMSG_DEFAULT_SIZE if the size of the payload isn't known * and a good default is needed. */ static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags) { return alloc_skb(nlmsg_total_size(payload), flags); } /** * nlmsg_end - Finalize a netlink message * @skb: socket buffer the message is stored in * @nlh: netlink message header * * Corrects the netlink message header to include the appeneded * attributes. Only necessary if attributes have been added to * the message. */ static inline void nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh) { nlh->nlmsg_len = skb_tail_pointer(skb) - (unsigned char *)nlh; } /** * nlmsg_get_pos - return current position in netlink message * @skb: socket buffer the message is stored in * * Returns a pointer to the current tail of the message. */ static inline void *nlmsg_get_pos(struct sk_buff *skb) { return skb_tail_pointer(skb); } /** * nlmsg_trim - Trim message to a mark * @skb: socket buffer the message is stored in * @mark: mark to trim to * * Trims the message to the provided mark. */ static inline void nlmsg_trim(struct sk_buff *skb, const void *mark) { if (mark) { WARN_ON((unsigned char *) mark < skb->data); skb_trim(skb, (unsigned char *) mark - skb->data); } } /** * nlmsg_cancel - Cancel construction of a netlink message * @skb: socket buffer the message is stored in * @nlh: netlink message header * * Removes the complete netlink message including all * attributes from the socket buffer again. */ static inline void nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh) { nlmsg_trim(skb, nlh); } /** * nlmsg_free - free a netlink message * @skb: socket buffer of netlink message */ static inline void nlmsg_free(struct sk_buff *skb) { kfree_skb(skb); } /** * nlmsg_multicast - multicast a netlink message * @sk: netlink socket to spread messages to * @skb: netlink message as socket buffer * @portid: own netlink portid to avoid sending to yourself * @group: multicast group id * @flags: allocation flags */ static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { int err; NETLINK_CB(skb).dst_group = group; err = netlink_broadcast(sk, skb, portid, group, flags); if (err > 0) err = 0; return err; } /** * nlmsg_unicast - unicast a netlink message * @sk: netlink socket to spread message to * @skb: netlink message as socket buffer * @portid: netlink portid of the destination socket */ static inline int nlmsg_unicast(struct sock *sk, struct sk_buff *skb, u32 portid) { int err; err = netlink_unicast(sk, skb, portid, MSG_DONTWAIT); if (err > 0) err = 0; return err; } /** * nlmsg_for_each_msg - iterate over a stream of messages * @pos: loop counter, set to current message * @head: head of message stream * @len: length of message stream * @rem: initialized to len, holds bytes currently remaining in stream */ #define nlmsg_for_each_msg(pos, head, len, rem) \ for (pos = head, rem = len; \ nlmsg_ok(pos, rem); \ pos = nlmsg_next(pos, &(rem))) /** * nl_dump_check_consistent - check if sequence is consistent and advertise if not * @cb: netlink callback structure that stores the sequence number * @nlh: netlink message header to write the flag to * * This function checks if the sequence (generation) number changed during dump * and if it did, advertises it in the netlink message header. * * The correct way to use it is to set cb->seq to the generation counter when * all locks for dumping have been acquired, and then call this function for * each message that is generated. * * Note that due to initialisation concerns, 0 is an invalid sequence number * and must not be used by code that uses this functionality. */ static inline void nl_dump_check_consistent(struct netlink_callback *cb, struct nlmsghdr *nlh) { if (cb->prev_seq && cb->seq != cb->prev_seq) nlh->nlmsg_flags |= NLM_F_DUMP_INTR; cb->prev_seq = cb->seq; } /************************************************************************** * Netlink Attributes **************************************************************************/ /** * nla_attr_size - length of attribute not including padding * @payload: length of payload */ static inline int nla_attr_size(int payload) { return NLA_HDRLEN + payload; } /** * nla_total_size - total length of attribute including padding * @payload: length of payload */ static inline int nla_total_size(int payload) { return NLA_ALIGN(nla_attr_size(payload)); } /** * nla_padlen - length of padding at the tail of attribute * @payload: length of payload */ static inline int nla_padlen(int payload) { return nla_total_size(payload) - nla_attr_size(payload); } /** * nla_type - attribute type * @nla: netlink attribute */ static inline int nla_type(const struct nlattr *nla) { return nla->nla_type & NLA_TYPE_MASK; } /** * nla_data - head of payload * @nla: netlink attribute */ static inline void *nla_data(const struct nlattr *nla) { return (char *) nla + NLA_HDRLEN; } /** * nla_len - length of payload * @nla: netlink attribute */ static inline int nla_len(const struct nlattr *nla) { return nla->nla_len - NLA_HDRLEN; } /** * nla_ok - check if the netlink attribute fits into the remaining bytes * @nla: netlink attribute * @remaining: number of bytes remaining in attribute stream */ static inline int nla_ok(const struct nlattr *nla, int remaining) { return remaining >= (int) sizeof(*nla) && nla->nla_len >= sizeof(*nla) && nla->nla_len <= remaining; } /** * nla_next - next netlink attribute in attribute stream * @nla: netlink attribute * @remaining: number of bytes remaining in attribute stream * * Returns the next netlink attribute in the attribute stream and * decrements remaining by the size of the current attribute. */ static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining) { unsigned int totlen = NLA_ALIGN(nla->nla_len); *remaining -= totlen; return (struct nlattr *) ((char *) nla + totlen); } /** * nla_find_nested - find attribute in a set of nested attributes * @nla: attribute containing the nested attributes * @attrtype: type of attribute to look for * * Returns the first attribute which matches the specified type. */ static inline struct nlattr * nla_find_nested(const struct nlattr *nla, int attrtype) { return nla_find(nla_data(nla), nla_len(nla), attrtype); } /** * nla_parse_nested - parse nested attributes * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @nla: attribute containing the nested attributes * @policy: validation policy * @extack: extended ACK report struct * * See nla_parse() */ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, const struct nlattr *nla, const struct nla_policy *policy, struct netlink_ext_ack *extack) { if (!(nla->nla_type & NLA_F_NESTED)) { NL_SET_ERR_MSG_ATTR(extack, nla, "NLA_F_NESTED is missing"); return -EINVAL; } return __nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy, NL_VALIDATE_STRICT, extack); } /** * nla_parse_nested_deprecated - parse nested attributes * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @nla: attribute containing the nested attributes * @policy: validation policy * @extack: extended ACK report struct * * See nla_parse_deprecated() */ static inline int nla_parse_nested_deprecated(struct nlattr *tb[], int maxtype, const struct nlattr *nla, const struct nla_policy *policy, struct netlink_ext_ack *extack) { return __nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy, NL_VALIDATE_LIBERAL, extack); } /** * nla_put_u8 - Add a u8 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value */ static inline int nla_put_u8(struct sk_buff *skb, int attrtype, u8 value) { /* temporary variables to work around GCC PR81715 with asan-stack=1 */ u8 tmp = value; return nla_put(skb, attrtype, sizeof(u8), &tmp); } /** * nla_put_u16 - Add a u16 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value */ static inline int nla_put_u16(struct sk_buff *skb, int attrtype, u16 value) { u16 tmp = value; return nla_put(skb, attrtype, sizeof(u16), &tmp); } /** * nla_put_be16 - Add a __be16 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value */ static inline int nla_put_be16(struct sk_buff *skb, int attrtype, __be16 value) { __be16 tmp = value; return nla_put(skb, attrtype, sizeof(__be16), &tmp); } /** * nla_put_net16 - Add 16-bit network byte order netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value */ static inline int nla_put_net16(struct sk_buff *skb, int attrtype, __be16 value) { __be16 tmp = value; return nla_put_be16(skb, attrtype | NLA_F_NET_BYTEORDER, tmp); } /** * nla_put_le16 - Add a __le16 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value */ static inline int nla_put_le16(struct sk_buff *skb, int attrtype, __le16 value) { __le16 tmp = value; return nla_put(skb, attrtype, sizeof(__le16), &tmp); } /** * nla_put_u32 - Add a u32 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value */ static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value) { u32 tmp = value; return nla_put(skb, attrtype, sizeof(u32), &tmp); } /** * nla_put_be32 - Add a __be32 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value */ static inline int nla_put_be32(struct sk_buff *skb, int attrtype, __be32 value) { __be32 tmp = value; return nla_put(skb, attrtype, sizeof(__be32), &tmp); } /** * nla_put_net32 - Add 32-bit network byte order netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value */ static inline int nla_put_net32(struct sk_buff *skb, int attrtype, __be32 value) { __be32 tmp = value; return nla_put_be32(skb, attrtype | NLA_F_NET_BYTEORDER, tmp); } /** * nla_put_le32 - Add a __le32 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value */ static inline int nla_put_le32(struct sk_buff *skb, int attrtype, __le32 value) { __le32 tmp = value; return nla_put(skb, attrtype, sizeof(__le32), &tmp); } /** * nla_put_u64_64bit - Add a u64 netlink attribute to a skb and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value * @padattr: attribute type for the padding */ static inline int nla_put_u64_64bit(struct sk_buff *skb, int attrtype, u64 value, int padattr) { u64 tmp = value; return nla_put_64bit(skb, attrtype, sizeof(u64), &tmp, padattr); } /** * nla_put_be64 - Add a __be64 netlink attribute to a socket buffer and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value * @padattr: attribute type for the padding */ static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value, int padattr) { __be64 tmp = value; return nla_put_64bit(skb, attrtype, sizeof(__be64), &tmp, padattr); } /** * nla_put_net64 - Add 64-bit network byte order nlattr to a skb and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value * @padattr: attribute type for the padding */ static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value, int padattr) { __be64 tmp = value; return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, tmp, padattr); } /** * nla_put_le64 - Add a __le64 netlink attribute to a socket buffer and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value * @padattr: attribute type for the padding */ static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value, int padattr) { __le64 tmp = value; return nla_put_64bit(skb, attrtype, sizeof(__le64), &tmp, padattr); } /** * nla_put_s8 - Add a s8 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value */ static inline int nla_put_s8(struct sk_buff *skb, int attrtype, s8 value) { s8 tmp = value; return nla_put(skb, attrtype, sizeof(s8), &tmp); } /** * nla_put_s16 - Add a s16 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value */ static inline int nla_put_s16(struct sk_buff *skb, int attrtype, s16 value) { s16 tmp = value; return nla_put(skb, attrtype, sizeof(s16), &tmp); } /** * nla_put_s32 - Add a s32 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value */ static inline int nla_put_s32(struct sk_buff *skb, int attrtype, s32 value) { s32 tmp = value; return nla_put(skb, attrtype, sizeof(s32), &tmp); } /** * nla_put_s64 - Add a s64 netlink attribute to a socket buffer and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value * @padattr: attribute type for the padding */ static inline int nla_put_s64(struct sk_buff *skb, int attrtype, s64 value, int padattr) { s64 tmp = value; return nla_put_64bit(skb, attrtype, sizeof(s64), &tmp, padattr); } /** * nla_put_string - Add a string netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @str: NUL terminated string */ static inline int nla_put_string(struct sk_buff *skb, int attrtype, const char *str) { return nla_put(skb, attrtype, strlen(str) + 1, str); } /** * nla_put_flag - Add a flag netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type */ static inline int nla_put_flag(struct sk_buff *skb, int attrtype) { return nla_put(skb, attrtype, 0, NULL); } /** * nla_put_msecs - Add a msecs netlink attribute to a skb and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @njiffies: number of jiffies to convert to msecs * @padattr: attribute type for the padding */ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, unsigned long njiffies, int padattr) { u64 tmp = jiffies_to_msecs(njiffies); return nla_put_64bit(skb, attrtype, sizeof(u64), &tmp, padattr); } /** * nla_put_in_addr - Add an IPv4 address netlink attribute to a socket * buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @addr: IPv4 address */ static inline int nla_put_in_addr(struct sk_buff *skb, int attrtype, __be32 addr) { __be32 tmp = addr; return nla_put_be32(skb, attrtype, tmp); } /** * nla_put_in6_addr - Add an IPv6 address netlink attribute to a socket * buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @addr: IPv6 address */ static inline int nla_put_in6_addr(struct sk_buff *skb, int attrtype, const struct in6_addr *addr) { return nla_put(skb, attrtype, sizeof(*addr), addr); } /** * nla_put_bitfield32 - Add a bitfield32 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: value carrying bits * @selector: selector of valid bits */ static inline int nla_put_bitfield32(struct sk_buff *skb, int attrtype, __u32 value, __u32 selector) { struct nla_bitfield32 tmp = { value, selector, }; return nla_put(skb, attrtype, sizeof(tmp), &tmp); } /** * nla_get_u32 - return payload of u32 attribute * @nla: u32 netlink attribute */ static inline u32 nla_get_u32(const struct nlattr *nla) { return *(u32 *) nla_data(nla); } /** * nla_get_be32 - return payload of __be32 attribute * @nla: __be32 netlink attribute */ static inline __be32 nla_get_be32(const struct nlattr *nla) { return *(__be32 *) nla_data(nla); } /** * nla_get_le32 - return payload of __le32 attribute * @nla: __le32 netlink attribute */ static inline __le32 nla_get_le32(const struct nlattr *nla) { return *(__le32 *) nla_data(nla); } /** * nla_get_u16 - return payload of u16 attribute * @nla: u16 netlink attribute */ static inline u16 nla_get_u16(const struct nlattr *nla) { return *(u16 *) nla_data(nla); } /** * nla_get_be16 - return payload of __be16 attribute * @nla: __be16 netlink attribute */ static inline __be16 nla_get_be16(const struct nlattr *nla) { return *(__be16 *) nla_data(nla); } /** * nla_get_le16 - return payload of __le16 attribute * @nla: __le16 netlink attribute */ static inline __le16 nla_get_le16(const struct nlattr *nla) { return *(__le16 *) nla_data(nla); } /** * nla_get_u8 - return payload of u8 attribute * @nla: u8 netlink attribute */ static inline u8 nla_get_u8(const struct nlattr *nla) { return *(u8 *) nla_data(nla); } /** * nla_get_u64 - return payload of u64 attribute * @nla: u64 netlink attribute */ static inline u64 nla_get_u64(const struct nlattr *nla) { u64 tmp; nla_memcpy(&tmp, nla, sizeof(tmp)); return tmp; } /** * nla_get_be64 - return payload of __be64 attribute * @nla: __be64 netlink attribute */ static inline __be64 nla_get_be64(const struct nlattr *nla) { __be64 tmp; nla_memcpy(&tmp, nla, sizeof(tmp)); return tmp; } /** * nla_get_le64 - return payload of __le64 attribute * @nla: __le64 netlink attribute */ static inline __le64 nla_get_le64(const struct nlattr *nla) { return *(__le64 *) nla_data(nla); } /** * nla_get_s32 - return payload of s32 attribute * @nla: s32 netlink attribute */ static inline s32 nla_get_s32(const struct nlattr *nla) { return *(s32 *) nla_data(nla); } /** * nla_get_s16 - return payload of s16 attribute * @nla: s16 netlink attribute */ static inline s16 nla_get_s16(const struct nlattr *nla) { return *(s16 *) nla_data(nla); } /** * nla_get_s8 - return payload of s8 attribute * @nla: s8 netlink attribute */ static inline s8 nla_get_s8(const struct nlattr *nla) { return *(s8 *) nla_data(nla); } /** * nla_get_s64 - return payload of s64 attribute * @nla: s64 netlink attribute */ static inline s64 nla_get_s64(const struct nlattr *nla) { s64 tmp; nla_memcpy(&tmp, nla, sizeof(tmp)); return tmp; } /** * nla_get_flag - return payload of flag attribute * @nla: flag netlink attribute */ static inline int nla_get_flag(const struct nlattr *nla) { return !!nla; } /** * nla_get_msecs - return payload of msecs attribute * @nla: msecs netlink attribute * * Returns the number of milliseconds in jiffies. */ static inline unsigned long nla_get_msecs(const struct nlattr *nla) { u64 msecs = nla_get_u64(nla); return msecs_to_jiffies((unsigned long) msecs); } /** * nla_get_in_addr