1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MMU_NOTIFIER_H #define _LINUX_MMU_NOTIFIER_H #include <linux/list.h> #include <linux/spinlock.h> #include <linux/mm_types.h> #include <linux/mmap_lock.h> #include <linux/srcu.h> #include <linux/interval_tree.h> struct mmu_notifier_subscriptions; struct mmu_notifier; struct mmu_notifier_range; struct mmu_interval_notifier; /** * enum mmu_notifier_event - reason for the mmu notifier callback * @MMU_NOTIFY_UNMAP: either munmap() that unmap the range or a mremap() that * move the range * * @MMU_NOTIFY_CLEAR: clear page table entry (many reasons for this like * madvise() or replacing a page by another one, ...). * * @MMU_NOTIFY_PROTECTION_VMA: update is due to protection change for the range * ie using the vma access permission (vm_page_prot) to update the whole range * is enough no need to inspect changes to the CPU page table (mprotect() * syscall) * * @MMU_NOTIFY_PROTECTION_PAGE: update is due to change in read/write flag for * pages in the range so to mirror those changes the user must inspect the CPU * page table (from the end callback). * * @MMU_NOTIFY_SOFT_DIRTY: soft dirty accounting (still same page and same * access flags). User should soft dirty the page in the end callback to make * sure that anyone relying on soft dirtyness catch pages that might be written * through non CPU mappings. * * @MMU_NOTIFY_RELEASE: used during mmu_interval_notifier invalidate to signal * that the mm refcount is zero and the range is no longer accessible. * * @MMU_NOTIFY_MIGRATE: used during migrate_vma_collect() invalidate to signal * a device driver to possibly ignore the invalidation if the * migrate_pgmap_owner field matches the driver's device private pgmap owner. */ enum mmu_notifier_event { MMU_NOTIFY_UNMAP = 0, MMU_NOTIFY_CLEAR, MMU_NOTIFY_PROTECTION_VMA, MMU_NOTIFY_PROTECTION_PAGE, MMU_NOTIFY_SOFT_DIRTY, MMU_NOTIFY_RELEASE, MMU_NOTIFY_MIGRATE, }; #define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0) struct mmu_notifier_ops { /* * Called either by mmu_notifier_unregister or when the mm is * being destroyed by exit_mmap, always before all pages are * freed. This can run concurrently with other mmu notifier * methods (the ones invoked outside the mm context) and it * should tear down all secondary mmu mappings and freeze the * secondary mmu. If this method isn't implemented you've to * be sure that nothing could possibly write to the pages * through the secondary mmu by the time the last thread with * tsk->mm == mm exits. * * As side note: the pages freed after ->release returns could * be immediately reallocated by the gart at an alias physical * address with a different cache model, so if ->release isn't * implemented because all _software_ driven memory accesses * through the secondary mmu are terminated by the time the * last thread of this mm quits, you've also to be sure that * speculative _hardware_ operations can't allocate dirty * cachelines in the cpu that could not be snooped and made * coherent with the other read and write operations happening * through the gart alias address, so leading to memory * corruption. */ void (*release)(struct mmu_notifier *subscription, struct mm_struct *mm); /* * clear_flush_young is called after the VM is * test-and-clearing the young/accessed bitflag in the * pte. This way the VM will provide proper aging to the * accesses to the page through the secondary MMUs and not * only to the ones through the Linux pte. * Start-end is necessary in case the secondary MMU is mapping the page * at a smaller granularity than the primary MMU. */ int (*clear_flush_young)(struct mmu_notifier *subscription, struct mm_struct *mm, unsigned long start, unsigned long end); /* * clear_young is a lightweight version of clear_flush_young. Like the * latter, it is supposed to test-and-clear the young/accessed bitflag * in the secondary pte, but it may omit flushing the secondary tlb. */ int (*clear_young)(struct mmu_notifier *subscription, struct mm_struct *mm, unsigned long start, unsigned long end); /* * test_young is called to check the young/accessed bitflag in * the secondary pte. This is used to know if the page is * frequently used without actually clearing the flag or tearing * down the secondary mapping on the page. */ int (*test_young)(struct mmu_notifier *subscription, struct mm_struct *mm, unsigned long address); /* * change_pte is called in cases that pte mapping to page is changed: * for example, when ksm remaps pte to point to a new shared page. */ void (*change_pte)(struct mmu_notifier *subscription, struct mm_struct *mm, unsigned long address, pte_t pte); /* * invalidate_range_start() and invalidate_range_end() must be * paired and are called only when the mmap_lock and/or the * locks protecting the reverse maps are held. If the subsystem * can't guarantee that no additional references are taken to * the pages in the range, it has to implement the * invalidate_range() notifier to remove any references taken * after invalidate_range_start(). * * Invalidation of multiple concurrent ranges may be * optionally permitted by the driver. Either way the * establishment of sptes is forbidden in the range passed to * invalidate_range_begin/end for the whole duration of the * invalidate_range_begin/end critical section. * * invalidate_range_start() is called when all pages in the * range are still mapped and have at least a refcount of one. * * invalidate_range_end() is called when all pages in the * range have been unmapped and the pages have been freed by * the VM. * * The VM will remove the page table entries and potentially * the page between invalidate_range_start() and * invalidate_range_end(). If the page must not be freed * because of pending I/O or other circumstances then the * invalidate_range_start() callback (or the initial mapping * by the driver) must make sure that the refcount is kept * elevated. * * If the driver increases the refcount when the pages are * initially mapped into an address space then either * invalidate_range_start() or invalidate_range_end() may * decrease the refcount. If the refcount is decreased on * invalidate_range_start() then the VM can free pages as page * table entries are removed. If the refcount is only * droppped on invalidate_range_end() then the driver itself * will drop the last refcount but it must take care to flush * any secondary tlb before doing the final free on the * page. Pages will no longer be referenced by the linux * address space but may still be referenced by sptes until * the last refcount is dropped. * * If blockable argument is set to false then the callback cannot * sleep and has to return with -EAGAIN if sleeping would be required. * 0 should be returned otherwise. Please note that notifiers that can * fail invalidate_range_start are not allowed to implement * invalidate_range_end, as there is no mechanism for informing the * notifier that its start failed. */ int (*invalidate_range_start)(struct mmu_notifier *subscription, const struct mmu_notifier_range *range); void (*invalidate_range_end)(struct mmu_notifier *subscription, const struct mmu_notifier_range *range); /* * invalidate_range() is either called between * invalidate_range_start() and invalidate_range_end() when the * VM has to free pages that where unmapped, but before the * pages are actually freed, or outside of _start()/_end() when * a (remote) TLB is necessary. * * If invalidate_range() is used to manage a non-CPU TLB with * shared page-tables, it not necessary to implement the * invalidate_range_start()/end() notifiers, as * invalidate_range() alread catches the points in time when an * external TLB range needs to be flushed. For more in depth * discussion on this see Documentation/vm/mmu_notifier.rst * * Note that this function might be called with just a sub-range * of what was passed to invalidate_range_start()/end(), if * called between those functions. */ void (*invalidate_range)(struct mmu_notifier *subscription, struct mm_struct *mm, unsigned long start, unsigned long end); /* * These callbacks are used with the get/put interface to manage the * lifetime of the mmu_notifier memory. alloc_notifier() returns a new * notifier for use with the mm. * * free_notifier() is only called after the mmu_notifier has been * fully put, calls to any ops callback are prevented and no ops * callbacks are currently running. It is called from a SRCU callback * and cannot sleep. */ struct mmu_notifier *(*alloc_notifier)(struct mm_struct *mm); void (*free_notifier)(struct mmu_notifier *subscription); }; /* * The notifier chains are protected by mmap_lock and/or the reverse map * semaphores. Notifier chains are only changed when all reverse maps and * the mmap_lock locks are taken. * * Therefore notifier chains can only be traversed when either * * 1. mmap_lock is held. * 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem). * 3. No other concurrent thread can access the list (release) */ struct mmu_notifier { struct hlist_node hlist; const struct mmu_notifier_ops *ops; struct mm_struct *mm; struct rcu_head rcu; unsigned int users; }; /** * struct mmu_interval_notifier_ops * @invalidate: Upon return the caller must stop using any SPTEs within this * range. This function can sleep. Return false only if sleeping * was required but mmu_notifier_range_blockable(range) is false. */ struct mmu_interval_notifier_ops { bool (*invalidate)(struct mmu_interval_notifier *interval_sub, const struct mmu_notifier_range *range, unsigned long cur_seq); }; struct mmu_interval_notifier { struct interval_tree_node interval_tree; const struct mmu_interval_notifier_ops *ops; struct mm_struct *mm; struct hlist_node deferred_item; unsigned long invalidate_seq; }; #ifdef CONFIG_MMU_NOTIFIER #ifdef CONFIG_LOCKDEP extern struct lockdep_map __mmu_notifier_invalidate_range_start_map; #endif struct mmu_notifier_range { struct vm_area_struct *vma; struct mm_struct *mm; unsigned long start; unsigned long end; unsigned flags; enum mmu_notifier_event event; void *migrate_pgmap_owner; }; static inline int mm_has_notifiers(struct mm_struct *mm) { return unlikely(mm->notifier_subscriptions); } struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops, struct mm_struct *mm); static inline struct mmu_notifier * mmu_notifier_get(const struct mmu_notifier_ops *ops, struct mm_struct *mm) { struct mmu_notifier *ret; mmap_write_lock(mm); ret = mmu_notifier_get_locked(ops, mm); mmap_write_unlock(mm); return ret; } void mmu_notifier_put(struct mmu_notifier *subscription); void mmu_notifier_synchronize(void); extern int mmu_notifier_register(struct mmu_notifier *subscription, struct mm_struct *mm); extern int __mmu_notifier_register(struct mmu_notifier *subscription, struct mm_struct *mm); extern void mmu_notifier_unregister(struct mmu_notifier *subscription, struct mm_struct *mm); unsigned long mmu_interval_read_begin(struct mmu_interval_notifier *interval_sub); int mmu_interval_notifier_insert(struct mmu_interval_notifier *interval_sub, struct mm_struct *mm, unsigned long start, unsigned long length, const struct mmu_interval_notifier_ops *ops); int mmu_interval_notifier_insert_locked( struct mmu_interval_notifier *interval_sub, struct mm_struct *mm, unsigned long start, unsigned long length, const struct mmu_interval_notifier_ops *ops); void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub); /** * mmu_interval_set_seq - Save the invalidation sequence * @interval_sub - The subscription passed to invalidate * @cur_seq - The cur_seq passed to the invalidate() callback * * This must be called unconditionally from the invalidate callback of a * struct mmu_interval_notifier_ops under the same lock that is used to call * mmu_interval_read_retry(). It updates the sequence number for later use by * mmu_interval_read_retry(). The provided cur_seq will always be odd. * * If the caller does not call mmu_interval_read_begin() or * mmu_interval_read_retry() then this call is not required. */ static inline void mmu_interval_set_seq(struct mmu_interval_notifier *interval_sub, unsigned long cur_seq) { WRITE_ONCE(interval_sub->invalidate_seq, cur_seq); } /** * mmu_interval_read_retry - End a read side critical section against a VA range * interval_sub: The subscription * seq: The return of the paired mmu_interval_read_begin() * * This MUST be called under a user provided lock that is also held * unconditionally by op->invalidate() when it calls mmu_interval_set_seq(). * * Each call should be paired with a single mmu_interval_read_begin() and * should be used to conclude the read side. * * Returns true if an invalidation collided with this critical section, and * the caller should retry. */ static inline bool mmu_interval_read_retry(struct mmu_interval_notifier *interval_sub, unsigned long seq) { return interval_sub->invalidate_seq != seq; } /** * mmu_interval_check_retry - Test if a collision has occurred * interval_sub: The subscription * seq: The return of the matching mmu_interval_read_begin() * * This can be used in the critical section between mmu_interval_read_begin() * and mmu_interval_read_retry(). A return of true indicates an invalidation * has collided with this critical region and a future * mmu_interval_read_retry() will return true. * * False is not reliable and only suggests a collision may not have * occured. It can be called many times and does not have to hold the user * provided lock. * * This call can be used as part of loops and other expensive operations to * expedite a retry. */ static inline bool mmu_interval_check_retry(struct mmu_interval_notifier *interval_sub, unsigned long seq) { /* Pairs with the WRITE_ONCE in mmu_interval_set_seq() */ return READ_ONCE(interval_sub->invalidate_seq) != seq; } extern void __mmu_notifier_subscriptions_destroy(struct mm_struct *mm); extern void __mmu_notifier_release(struct mm_struct *mm); extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, unsigned long start, unsigned long end); extern int __mmu_notifier_clear_young(struct mm_struct *mm, unsigned long start, unsigned long end); extern int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address); extern void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, pte_t pte); extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r); extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r, bool only_end); extern void __mmu_notifier_invalidate_range(struct mm_struct *mm, unsigned long start, unsigned long end); extern bool mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range); static inline bool mmu_notifier_range_blockable(const struct mmu_notifier_range *range) { return (range->flags & MMU_NOTIFIER_RANGE_BLOCKABLE); } static inline void mmu_notifier_release(struct mm_struct *mm) { if (mm_has_notifiers(mm)) __mmu_notifier_release(mm); } static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, unsigned long start, unsigned long end) { if (mm_has_notifiers(mm)) return __mmu_notifier_clear_flush_young(mm, start, end); return 0; } static inline int mmu_notifier_clear_young(struct mm_struct *mm, unsigned long start, unsigned long end) { if (mm_has_notifiers(mm)) return __mmu_notifier_clear_young(mm, start, end); return 0; } static inline int mmu_notifier_test_young(struct mm_struct *mm, unsigned long address) { if (mm_has_notifiers(mm)) return __mmu_notifier_test_young(mm, address); return 0; } static inline void mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, pte_t pte) { if (mm_has_notifiers(mm)) __mmu_notifier_change_pte(mm, address, pte); } static inline void mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) { might_sleep(); lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); if (mm_has_notifiers(range->mm)) { range->flags |= MMU_NOTIFIER_RANGE_BLOCKABLE; __mmu_notifier_invalidate_range_start(range); } lock_map_release(&__mmu_notifier_invalidate_range_start_map); } static inline int mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range) { int ret = 0; lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); if (mm_has_notifiers(range->mm)) { range->flags &= ~MMU_NOTIFIER_RANGE_BLOCKABLE; ret = __mmu_notifier_invalidate_range_start(range); } lock_map_release(&__mmu_notifier_invalidate_range_start_map); return ret; } static inline void mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range) { if (mmu_notifier_range_blockable(range)) might_sleep(); if (mm_has_notifiers(range->mm)) __mmu_notifier_invalidate_range_end(range, false); } static inline void mmu_notifier_invalidate_range_only_end(struct mmu_notifier_range *range) { if (mm_has_notifiers(range->mm)) __mmu_notifier_invalidate_range_end(range, true); } static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, unsigned long start, unsigned long end) { if (mm_has_notifiers(mm)) __mmu_notifier_invalidate_range(mm, start, end); } static inline void mmu_notifier_subscriptions_init(struct mm_struct *mm) { mm->notifier_subscriptions = NULL; } static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) { if (mm_has_notifiers(mm)) __mmu_notifier_subscriptions_destroy(mm); } static inline void mmu_notifier_range_init(struct mmu_notifier_range *range, enum mmu_notifier_event event, unsigned flags, struct vm_area_struct *vma, struct mm_struct *mm, unsigned long start, unsigned long end) { range->vma = vma; range->event = event; range->mm = mm; range->start = start; range->end = end; range->flags = flags; } static inline void mmu_notifier_range_init_migrate( struct mmu_notifier_range *range, unsigned int flags, struct vm_area_struct *vma, struct mm_struct *mm, unsigned long start, unsigned long end, void *pgmap) { mmu_notifier_range_init(range, MMU_NOTIFY_MIGRATE, flags, vma, mm, start, end); range->migrate_pgmap_owner = pgmap; } #define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ ({ \ int __young; \ struct vm_area_struct *___vma = __vma; \ unsigned long ___address = __address; \ __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ ___address, \ ___address + \ PAGE_SIZE); \ __young; \ }) #define pmdp_clear_flush_young_notify(__vma, __address, __pmdp) \ ({ \ int __young; \ struct vm_area_struct *___vma = __vma; \ unsigned long ___address = __address; \ __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ ___address, \ ___address + \ PMD_SIZE); \ __young; \ }) #define ptep_clear_young_notify(__vma, __address, __ptep) \ ({ \ int __young; \ struct vm_area_struct *___vma = __vma; \ unsigned long ___address = __address; \ __young = ptep_test_and_clear_young(___vma, ___address, __ptep);\ __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \ ___address + PAGE_SIZE); \ __young; \ }) #define pmdp_clear_young_notify(__vma, __address, __pmdp) \ ({ \ int __young; \ struct vm_area_struct *___vma = __vma; \ unsigned long ___address = __address; \ __young = pmdp_test_and_clear_young(___vma, ___address, __pmdp);\ __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \ ___address + PMD_SIZE); \ __young; \ }) #define ptep_clear_flush_notify(__vma, __address, __ptep) \ ({ \ unsigned long ___addr = __address & PAGE_MASK; \ struct mm_struct *___mm = (__vma)->vm_mm; \ pte_t ___pte; \ \ ___pte = ptep_clear_flush(__vma, __address, __ptep); \ mmu_notifier_invalidate_range(___mm, ___addr, \ ___addr + PAGE_SIZE); \ \ ___pte; \ }) #define pmdp_huge_clear_flush_notify(__vma, __haddr, __pmd) \ ({ \ unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ struct mm_struct *___mm = (__vma)->vm_mm; \ pmd_t ___pmd; \ \ ___pmd = pmdp_huge_clear_flush(__vma, __haddr, __pmd); \ mmu_notifier_invalidate_range(___mm, ___haddr, \ ___haddr + HPAGE_PMD_SIZE); \ \ ___pmd; \ }) #define pudp_huge_clear_flush_notify(__vma, __haddr, __pud) \ ({ \ unsigned long ___haddr = __haddr & HPAGE_PUD_MASK; \ struct mm_struct *___mm = (__vma)->vm_mm; \ pud_t ___pud; \ \ ___pud = pudp_huge_clear_flush(__vma, __haddr, __pud); \ mmu_notifier_invalidate_range(___mm, ___haddr, \ ___haddr + HPAGE_PUD_SIZE); \ \ ___pud; \ }) /* * set_pte_at_notify() sets the pte _after_ running the notifier. * This is safe to start by updating the secondary MMUs, because the primary MMU * pte invalidate must have already happened with a ptep_clear_flush() before * set_pte_at_notify() has been invoked. Updating the secondary MMUs first is * required when we change both the protection of the mapping from read-only to * read-write and the pfn (like during copy on write page faults). Otherwise the * old page would remain mapped readonly in the secondary MMUs after the new * page is already writable by some CPU through the primary MMU. */ #define set_pte_at_notify(__mm, __address, __ptep, __pte) \ ({ \ struct mm_struct *___mm = __mm; \ unsigned long ___address = __address; \ pte_t ___pte = __pte; \ \ mmu_notifier_change_pte(___mm, ___address, ___pte); \ set_pte_at(___mm, ___address, __ptep, ___pte); \ }) #else /* CONFIG_MMU_NOTIFIER */ struct mmu_notifier_range { unsigned long start; unsigned long end; }; static inline void _mmu_notifier_range_init(struct mmu_notifier_range *range, unsigned long start, unsigned long end) { range->start = start; range->end = end; } #define mmu_notifier_range_init(range,event,flags,vma,mm,start,end) \ _mmu_notifier_range_init(range, start, end) #define mmu_notifier_range_init_migrate(range, flags, vma, mm, start, end, \ pgmap) \ _mmu_notifier_range_init(range, start, end) static inline bool mmu_notifier_range_blockable(const struct mmu_notifier_range *range) { return true; } static inline int mm_has_notifiers(struct mm_struct *mm) { return 0; } static inline void mmu_notifier_release(struct mm_struct *mm) { } static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, unsigned long start, unsigned long end) { return 0; } static inline int mmu_notifier_test_young(struct mm_struct *mm, unsigned long address) { return 0; } static inline void mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, pte_t pte) { } static inline void mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) { } static inline int mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range) { return 0; } static inline void mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range) { } static inline void mmu_notifier_invalidate_range_only_end(struct mmu_notifier_range *range) { } static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, unsigned long start, unsigned long end) { } static inline void mmu_notifier_subscriptions_init(struct mm_struct *mm) { } static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) { } #define mmu_notifier_range_update_to_read_only(r) false #define ptep_clear_flush_young_notify ptep_clear_flush_young #define pmdp_clear_flush_young_notify pmdp_clear_flush_young #define ptep_clear_young_notify ptep_test_and_clear_young #define pmdp_clear_young_notify pmdp_test_and_clear_young #define ptep_clear_flush_notify ptep_clear_flush #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush #define pudp_huge_clear_flush_notify pudp_huge_clear_flush #define set_pte_at_notify set_pte_at static inline void mmu_notifier_synchronize(void) { } #endif /* CONFIG_MMU_NOTIFIER */ #endif /* _LINUX_MMU_NOTIFIER_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM fib #if !defined(_TRACE_FIB_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_FIB_H #include <linux/skbuff.h> #include <linux/netdevice.h> #include <net/ip_fib.h> #include <linux/tracepoint.h> TRACE_EVENT(fib_table_lookup, TP_PROTO(u32 tb_id, const struct flowi4 *flp, const struct fib_nh_common *nhc, int err), TP_ARGS(tb_id, flp, nhc, err), TP_STRUCT__entry( __field( u32, tb_id ) __field( int, err ) __field( int, oif ) __field( int, iif ) __field( u8, proto ) __field( __u8, tos ) __field( __u8, scope ) __field( __u8, flags ) __array( __u8, src, 4 ) __array( __u8, dst, 4 ) __array( __u8, gw4, 4 ) __array( __u8, gw6, 16 ) __field( u16, sport ) __field( u16, dport ) __dynamic_array(char, name, IFNAMSIZ ) ), TP_fast_assign( struct in6_addr in6_zero = {}; struct net_device *dev; struct in6_addr *in6; __be32 *p32; __entry->tb_id = tb_id; __entry->err = err; __entry->oif = flp->flowi4_oif; __entry->iif = flp->flowi4_iif; __entry->tos = flp->flowi4_tos; __entry->scope = flp->flowi4_scope; __entry->flags = flp->flowi4_flags; p32 = (__be32 *) __entry->src; *p32 = flp->saddr; p32 = (__be32 *) __entry->dst; *p32 = flp->daddr; __entry->proto = flp->flowi4_proto; if (__entry->proto == IPPROTO_TCP || __entry->proto == IPPROTO_UDP) { __entry->sport = ntohs(flp->fl4_sport); __entry->dport = ntohs(flp->fl4_dport); } else { __entry->sport = 0; __entry->dport = 0; } dev = nhc ? nhc->nhc_dev : NULL; __assign_str(name, dev ? dev->name : "-"); if (nhc) { if (nhc->nhc_gw_family == AF_INET) { p32 = (__be32 *) __entry->gw4; *p32 = nhc->nhc_gw.ipv4; in6 = (struct in6_addr *)__entry->gw6; *in6 = in6_zero; } else if (nhc->nhc_gw_family == AF_INET6) { p32 = (__be32 *) __entry->gw4; *p32 = 0; in6 = (struct in6_addr *)__entry->gw6; *in6 = nhc->nhc_gw.ipv6; } } else { p32 = (__be32 *) __entry->gw4; *p32 = 0; in6 = (struct in6_addr *)__entry->gw6; *in6 = in6_zero; } ), TP_printk("table %u oif %d iif %d proto %u %pI4/%u -> %pI4/%u tos %d scope %d flags %x ==> dev %s gw %pI4/%pI6c err %d", __entry->tb_id, __entry->oif, __entry->iif, __entry->proto, __entry->src, __entry->sport, __entry->dst, __entry->dport, __entry->tos, __entry->scope, __entry->flags, __get_str(name), __entry->gw4, __entry->gw6, __entry->err) ); #endif /* _TRACE_FIB_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 /* BlueZ - Bluetooth protocol stack for Linux Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved. Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ #ifndef __HCI_CORE_H #define __HCI_CORE_H #include <linux/idr.h> #include <linux/leds.h> #include <linux/rculist.h> #include <net/bluetooth/hci.h> #include <net/bluetooth/hci_sock.h> /* HCI priority */ #define HCI_PRIO_MAX 7 /* HCI Core structures */ struct inquiry_data { bdaddr_t bdaddr; __u8 pscan_rep_mode; __u8 pscan_period_mode; __u8 pscan_mode; __u8 dev_class[3]; __le16 clock_offset; __s8 rssi; __u8 ssp_mode; }; struct inquiry_entry { struct list_head all; /* inq_cache.all */ struct list_head list; /* unknown or resolve */ enum { NAME_NOT_KNOWN, NAME_NEEDED, NAME_PENDING, NAME_KNOWN, } name_state; __u32 timestamp; struct inquiry_data data; }; struct discovery_state { int type; enum { DISCOVERY_STOPPED, DISCOVERY_STARTING, DISCOVERY_FINDING, DISCOVERY_RESOLVING, DISCOVERY_STOPPING, } state; struct list_head all; /* All devices found during inquiry */ struct list_head unknown; /* Name state not known */ struct list_head resolve; /* Name needs to be resolved */ __u32 timestamp; bdaddr_t last_adv_addr; u8 last_adv_addr_type; s8 last_adv_rssi; u32 last_adv_flags; u8 last_adv_data[HCI_MAX_AD_LENGTH]; u8 last_adv_data_len; bool report_invalid_rssi; bool result_filtering; bool limited; s8 rssi; u16 uuid_count; u8 (*uuids)[16]; unsigned long scan_start; unsigned long scan_duration; }; #define SUSPEND_NOTIFIER_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ enum suspend_tasks { SUSPEND_PAUSE_DISCOVERY, SUSPEND_UNPAUSE_DISCOVERY, SUSPEND_PAUSE_ADVERTISING, SUSPEND_UNPAUSE_ADVERTISING, SUSPEND_SCAN_DISABLE, SUSPEND_SCAN_ENABLE, SUSPEND_DISCONNECTING, SUSPEND_POWERING_DOWN, SUSPEND_PREPARE_NOTIFIER, __SUSPEND_NUM_TASKS }; enum suspended_state { BT_RUNNING = 0, BT_SUSPEND_DISCONNECT, BT_SUSPEND_CONFIGURE_WAKE, }; struct hci_conn_hash { struct list_head list; unsigned int acl_num; unsigned int amp_num; unsigned int sco_num; unsigned int le_num; unsigned int le_num_slave; }; struct bdaddr_list { struct list_head list; bdaddr_t bdaddr; u8 bdaddr_type; }; struct bdaddr_list_with_irk { struct list_head list; bdaddr_t bdaddr; u8 bdaddr_type; u8 peer_irk[16]; u8 local_irk[16]; }; struct bdaddr_list_with_flags { struct list_head list; bdaddr_t bdaddr; u8 bdaddr_type; u32 current_flags; }; enum hci_conn_flags { HCI_CONN_FLAG_REMOTE_WAKEUP, HCI_CONN_FLAG_MAX }; #define hci_conn_test_flag(nr, flags) ((flags) & (1U << nr)) /* Make sure number of flags doesn't exceed sizeof(current_flags) */ static_assert(HCI_CONN_FLAG_MAX < 32); struct bt_uuid { struct list_head list; u8 uuid[16]; u8 size; u8 svc_hint; }; struct blocked_key { struct list_head list; struct rcu_head rcu; u8 type; u8 val[16]; }; struct smp_csrk { bdaddr_t bdaddr; u8 bdaddr_type; u8 type; u8 val[16]; }; struct smp_ltk { struct list_head list; struct rcu_head rcu; bdaddr_t bdaddr; u8 bdaddr_type; u8 authenticated; u8 type; u8 enc_size; __le16 ediv; __le64 rand; u8 val[16]; }; struct smp_irk { struct list_head list; struct rcu_head rcu; bdaddr_t rpa; bdaddr_t bdaddr; u8 addr_type; u8 val[16]; }; struct link_key { struct list_head list; struct rcu_head rcu; bdaddr_t bdaddr; u8 type; u8 val[HCI_LINK_KEY_SIZE]; u8 pin_len; }; struct oob_data { struct list_head list; bdaddr_t bdaddr; u8 bdaddr_type; u8 present; u8 hash192[16]; u8 rand192[16]; u8 hash256[16]; u8 rand256[16]; }; struct adv_info { struct list_head list; bool pending; __u8 instance; __u32 flags; __u16 timeout; __u16 remaining_time; __u16 duration; __u16 adv_data_len; __u8 adv_data[HCI_MAX_EXT_AD_LENGTH]; __u16 scan_rsp_len; __u8 scan_rsp_data[HCI_MAX_EXT_AD_LENGTH]; __s8 tx_power; bdaddr_t random_addr; bool rpa_expired; struct delayed_work rpa_expired_cb; }; #define HCI_MAX_ADV_INSTANCES 5 #define HCI_DEFAULT_ADV_DURATION 2 struct adv_pattern { struct list_head list; __u8 ad_type; __u8 offset; __u8 length; __u8 value[HCI_MAX_AD_LENGTH]; }; struct adv_monitor { struct list_head patterns; bool active; __u16 handle; }; #define HCI_MIN_ADV_MONITOR_HANDLE 1 #define HCI_MAX_ADV_MONITOR_NUM_HANDLES 32 #define HCI_MAX_ADV_MONITOR_NUM_PATTERNS 16 #define HCI_MAX_SHORT_NAME_LENGTH 10 /* Min encryption key size to match with SMP */ #define HCI_MIN_ENC_KEY_SIZE 7 /* Default LE RPA expiry time, 15 minutes */ #define HCI_DEFAULT_RPA_TIMEOUT (15 * 60) /* Default min/max age of connection information (1s/3s) */ #define DEFAULT_CONN_INFO_MIN_AGE 1000 #define DEFAULT_CONN_INFO_MAX_AGE 3000 /* Default authenticated payload timeout 30s */ #define DEFAULT_AUTH_PAYLOAD_TIMEOUT 0x0bb8 struct amp_assoc { __u16 len; __u16 offset; __u16 rem_len; __u16 len_so_far; __u8 data[HCI_MAX_AMP_ASSOC_SIZE]; }; #define HCI_MAX_PAGES 3 struct hci_dev { struct list_head list; struct mutex lock; char name[8]; unsigned long flags; __u16 id; __u8 bus; __u8 dev_type; bdaddr_t bdaddr; bdaddr_t setup_addr; bdaddr_t public_addr; bdaddr_t random_addr; bdaddr_t static_addr; __u8 adv_addr_type; __u8 dev_name[HCI_MAX_NAME_LENGTH]; __u8 short_name[HCI_MAX_SHORT_NAME_LENGTH]; __u8 eir[HCI_MAX_EIR_LENGTH]; __u16 appearance; __u8 dev_class[3]; __u8 major_class; __u8 minor_class; __u8 max_page; __u8 features[HCI_MAX_PAGES][8]; __u8 le_features[8]; __u8 le_white_list_size; __u8 le_resolv_list_size; __u8 le_num_of_adv_sets; __u8 le_states[8]; __u8 commands[64]; __u8 hci_ver; __u16 hci_rev; __u8 lmp_ver; __u16 manufacturer; __u16 lmp_subver; __u16 voice_setting; __u8 num_iac; __u8 stored_max_keys; __u8 stored_num_keys; __u8 io_capability; __s8 inq_tx_power; __u8 err_data_reporting; __u16 page_scan_interval; __u16 page_scan_window; __u8 page_scan_type; __u8 le_adv_channel_map; __u16 le_adv_min_interval; __u16 le_adv_max_interval; __u8 le_scan_type; __u16 le_scan_interval; __u16 le_scan_window; __u16 le_scan_int_suspend; __u16 le_scan_window_suspend; __u16 le_scan_int_discovery; __u16 le_scan_window_discovery; __u16 le_scan_int_adv_monitor; __u16 le_scan_window_adv_monitor; __u16 le_scan_int_connect; __u16 le_scan_window_connect; __u16 le_conn_min_interval; __u16 le_conn_max_interval; __u16 le_conn_latency; __u16 le_supv_timeout; __u16 le_def_tx_len; __u16 le_def_tx_time; __u16 le_max_tx_len; __u16 le_max_tx_time; __u16 le_max_rx_len; __u16 le_max_rx_time; __u8 le_max_key_size; __u8 le_min_key_size; __u16 discov_interleaved_timeout; __u16 conn_info_min_age; __u16 conn_info_max_age; __u16 auth_payload_timeout; __u8 min_enc_key_size; __u8 max_enc_key_size; __u8 pairing_opts; __u8 ssp_debug_mode; __u8 hw_error_code; __u32 clock; __u16 devid_source; __u16 devid_vendor; __u16 devid_product; __u16 devid_version; __u8 def_page_scan_type; __u16 def_page_scan_int; __u16 def_page_scan_window; __u8 def_inq_scan_type; __u16 def_inq_scan_int; __u16 def_inq_scan_window; __u16 def_br_lsto; __u16 def_page_timeout; __u16 def_multi_adv_rotation_duration; __u16 def_le_autoconnect_timeout; __u16 pkt_type; __u16 esco_type; __u16 link_policy; __u16 link_mode; __u32 idle_timeout; __u16 sniff_min_interval; __u16 sniff_max_interval; __u8 amp_status; __u32 amp_total_bw; __u32 amp_max_bw; __u32 amp_min_latency; __u32 amp_max_pdu; __u8 amp_type; __u16 amp_pal_cap; __u16 amp_assoc_size; __u32 amp_max_flush_to; __u32 amp_be_flush_to; struct amp_assoc loc_assoc; __u8 flow_ctl_mode; unsigned int auto_accept_delay; unsigned long quirks; atomic_t cmd_cnt; unsigned int acl_cnt; unsigned int sco_cnt; unsigned int le_cnt; unsigned int acl_mtu; unsigned int sco_mtu; unsigned int le_mtu; unsigned int acl_pkts; unsigned int sco_pkts; unsigned int le_pkts; __u16 block_len; __u16 block_mtu; __u16 num_blocks; __u16 block_cnt; unsigned long acl_last_tx; unsigned long sco_last_tx; unsigned long le_last_tx; __u8 le_tx_def_phys; __u8 le_rx_def_phys; struct workqueue_struct *workqueue; struct workqueue_struct *req_workqueue; struct work_struct power_on; struct delayed_work power_off; struct work_struct error_reset; __u16 discov_timeout; struct delayed_work discov_off; struct delayed_work service_cache; struct delayed_work cmd_timer; struct work_struct rx_work; struct work_struct cmd_work; struct work_struct tx_work; struct work_struct discov_update; struct work_struct bg_scan_update; struct work_struct scan_update; struct work_struct connectable_update; struct work_struct discoverable_update; struct delayed_work le_scan_disable; struct delayed_work le_scan_restart; struct sk_buff_head rx_q; struct sk_buff_head raw_q; struct sk_buff_head cmd_q; struct sk_buff *sent_cmd; struct mutex req_lock; wait_queue_head_t req_wait_q; __u32 req_status; __u32 req_result; struct sk_buff *req_skb; void *smp_data; void *smp_bredr_data; struct discovery_state discovery; int discovery_old_state; bool discovery_paused; int advertising_old_state; bool advertising_paused; struct notifier_block suspend_notifier; struct work_struct suspend_prepare; enum suspended_state suspend_state_next; enum suspended_state suspend_state; bool scanning_paused; bool suspended; u8 wake_reason; bdaddr_t wake_addr; u8 wake_addr_type; wait_queue_head_t suspend_wait_q; DECLARE_BITMAP(suspend_tasks, __SUSPEND_NUM_TASKS); struct hci_conn_hash conn_hash; struct list_head mgmt_pending; struct list_head blacklist; struct list_head whitelist; struct list_head uuids; struct list_head link_keys; struct list_head long_term_keys; struct list_head identity_resolving_keys; struct list_head remote_oob_data; struct list_head le_white_list; struct list_head le_resolv_list; struct list_head le_conn_params; struct list_head pend_le_conns; struct list_head pend_le_reports; struct list_head blocked_keys; struct hci_dev_stats stat; atomic_t promisc; const char *hw_info; const char *fw_info; struct dentry *debugfs; struct device dev; struct rfkill *rfkill; DECLARE_BITMAP(dev_flags, __HCI_NUM_FLAGS); __s8 adv_tx_power; __u8 adv_data[HCI_MAX_EXT_AD_LENGTH]; __u8 adv_data_len; __u8 scan_rsp_data[HCI_MAX_EXT_AD_LENGTH]; __u8 scan_rsp_data_len; struct list_head adv_instances; unsigned int adv_instance_cnt; __u8 cur_adv_instance; __u16 adv_instance_timeout; struct delayed_work adv_instance_expire; struct idr adv_monitors_idr; unsigned int adv_monitors_cnt; __u8 irk[16]; __u32 rpa_timeout; struct delayed_work rpa_expired; bdaddr_t rpa; #if IS_ENABLED(CONFIG_BT_LEDS) struct led_trigger *power_led; #endif #if IS_ENABLED(CONFIG_BT_MSFTEXT) __u16 msft_opcode; void *msft_data; #endif int (*open)(struct hci_dev *hdev); int (*close)(struct hci_dev *hdev); int (*flush)(struct hci_dev *hdev); int (*setup)(struct hci_dev *hdev); int (*shutdown)(struct hci_dev *hdev); int (*send)(struct hci_dev *hdev, struct sk_buff *skb); void (*notify)(struct hci_dev *hdev, unsigned int evt); void (*hw_error)(struct hci_dev *hdev, u8 code); int (*post_init)(struct hci_dev *hdev); int (*set_diag)(struct hci_dev *hdev, bool enable); int (*set_bdaddr)(struct hci_dev *hdev, const bdaddr_t *bdaddr); void (*cmd_timeout)(struct hci_dev *hdev); bool (*prevent_wake)(struct hci_dev *hdev); }; #define HCI_PHY_HANDLE(handle) (handle & 0xff) enum conn_reasons { CONN_REASON_PAIR_DEVICE, CONN_REASON_L2CAP_CHAN, CONN_REASON_SCO_CONNECT, }; struct hci_conn { struct list_head list; atomic_t refcnt; bdaddr_t dst; __u8 dst_type; bdaddr_t src; __u8 src_type; bdaddr_t init_addr; __u8 init_addr_type; bdaddr_t resp_addr; __u8 resp_addr_type; __u16 handle; __u16 state; __u8 mode; __u8 type; __u8 role; bool out; __u8 attempt; __u8 dev_class[3]; __u8 features[HCI_MAX_PAGES][8]; __u16 pkt_type; __u16 link_policy; __u8 key_type; __u8 auth_type; __u8 sec_level; __u8 pending_sec_level; __u8 pin_length; __u8 enc_key_size; __u8 io_capability; __u32 passkey_notify; __u8 passkey_entered; __u16 disc_timeout; __u16 conn_timeout; __u16 setting; __u16 auth_payload_timeout; __u16 le_conn_min_interval; __u16 le_conn_max_interval; __u16 le_conn_interval; __u16 le_conn_latency; __u16 le_supv_timeout; __u8 le_adv_data[HCI_MAX_AD_LENGTH]; __u8 le_adv_data_len; __u8 le_tx_phy; __u8 le_rx_phy; __s8 rssi; __s8 tx_power; __s8 max_tx_power; unsigned long flags; enum conn_reasons conn_reason; __u32 clock; __u16 clock_accuracy; unsigned long conn_info_timestamp; __u8 remote_cap; __u8 remote_auth; __u8 remote_id; unsigned int sent; struct sk_buff_head data_q; struct list_head chan_list; struct delayed_work disc_work; struct delayed_work auto_accept_work; struct delayed_work idle_work; struct delayed_work le_conn_timeout; struct work_struct le_scan_cleanup; struct device dev; struct dentry *debugfs; struct hci_dev *hdev; void *l2cap_data; void *sco_data; struct amp_mgr *amp_mgr; struct hci_conn *link; void (*connect_cfm_cb) (struct hci_conn *conn, u8 status); void (*security_cfm_cb) (struct hci_conn *conn, u8 status); void (*disconn_cfm_cb) (struct hci_conn *conn, u8 reason); }; struct hci_chan { struct list_head list; __u16 handle; struct hci_conn *conn; struct sk_buff_head data_q; unsigned int sent; __u8 state; bool amp; }; struct hci_conn_params { struct list_head list; struct list_head action; bdaddr_t addr; u8 addr_type; u16 conn_min_interval; u16 conn_max_interval; u16 conn_latency; u16 supervision_timeout; enum { HCI_AUTO_CONN_DISABLED, HCI_AUTO_CONN_REPORT, HCI_AUTO_CONN_DIRECT, HCI_AUTO_CONN_ALWAYS, HCI_AUTO_CONN_LINK_LOSS, HCI_AUTO_CONN_EXPLICIT, } auto_connect; struct hci_conn *conn; bool explicit_connect; u32 current_flags; }; extern struct list_head hci_dev_list; extern struct list_head hci_cb_list; extern rwlock_t hci_dev_list_lock; extern struct mutex hci_cb_list_lock; #define hci_dev_set_flag(hdev, nr) set_bit((nr), (hdev)->dev_flags) #define hci_dev_clear_flag(hdev, nr) clear_bit((nr), (hdev)->dev_flags) #define hci_dev_change_flag(hdev, nr) change_bit((nr), (hdev)->dev_flags) #define hci_dev_test_flag(hdev, nr) test_bit((nr), (hdev)->dev_flags) #define hci_dev_test_and_set_flag(hdev, nr) test_and_set_bit((nr), (hdev)->dev_flags) #define hci_dev_test_and_clear_flag(hdev, nr) test_and_clear_bit((nr), (hdev)->dev_flags) #define hci_dev_test_and_change_flag(hdev, nr) test_and_change_bit((nr), (hdev)->dev_flags) #define hci_dev_clear_volatile_flags(hdev) \ do { \ hci_dev_clear_flag(hdev, HCI_LE_SCAN); \ hci_dev_clear_flag(hdev, HCI_LE_ADV); \ hci_dev_clear_flag(hdev, HCI_LL_RPA_RESOLUTION);\ hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ); \ } while (0) /* ----- HCI interface to upper protocols ----- */ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr); int l2cap_disconn_ind(struct hci_conn *hcon); void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags); #if IS_ENABLED(CONFIG_BT_BREDR) int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags); void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb); #else static inline int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) { return 0; } static inline void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) { } #endif /* ----- Inquiry cache ----- */ #define INQUIRY_CACHE_AGE_MAX (HZ*30) /* 30 seconds */ #define INQUIRY_ENTRY_AGE_MAX (HZ*60) /* 60 seconds */ static inline void discovery_init(struct hci_dev *hdev) { hdev->discovery.state = DISCOVERY_STOPPED; INIT_LIST_HEAD(&hdev->discovery.all); INIT_LIST_HEAD(&hdev->discovery.unknown); INIT_LIST_HEAD(&hdev->discovery.resolve); hdev->discovery.report_invalid_rssi = true; hdev->discovery.rssi = HCI_RSSI_INVALID; } static inline void hci_discovery_filter_clear(struct hci_dev *hdev) { hdev->discovery.result_filtering = false; hdev->discovery.report_invalid_rssi = true; hdev->discovery.rssi = HCI_RSSI_INVALID; hdev->discovery.uuid_count = 0; kfree(hdev->discovery.uuids); hdev->discovery.uuids = NULL; hdev->discovery.scan_start = 0; hdev->discovery.scan_duration = 0; } bool hci_discovery_active(struct hci_dev *hdev); void hci_discovery_set_state(struct hci_dev *hdev, int state); static inline int inquiry_cache_empty(struct hci_dev *hdev) { return list_empty(&hdev->discovery.all); } static inline long inquiry_cache_age(struct hci_dev *hdev) { struct discovery_state *c = &hdev->discovery; return jiffies - c->timestamp; } static inline long inquiry_entry_age(struct inquiry_entry *e) { return jiffies - e->timestamp; } struct inquiry_entry *hci_inquiry_cache_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr); struct inquiry_entry *hci_inquiry_cache_lookup_unknown(struct hci_dev *hdev, bdaddr_t *bdaddr); struct inquiry_entry *hci_inquiry_cache_lookup_resolve(struct hci_dev *hdev, bdaddr_t *bdaddr, int state); void hci_inquiry_cache_update_resolve(struct hci_dev *hdev, struct inquiry_entry *ie); u32 hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data, bool name_known); void hci_inquiry_cache_flush(struct hci_dev *hdev); /* ----- HCI Connections ----- */ enum { HCI_CONN_AUTH_PEND, HCI_CONN_REAUTH_PEND, HCI_CONN_ENCRYPT_PEND, HCI_CONN_RSWITCH_PEND, HCI_CONN_MODE_CHANGE_PEND, HCI_CONN_SCO_SETUP_PEND, HCI_CONN_MGMT_CONNECTED, HCI_CONN_SSP_ENABLED, HCI_CONN_SC_ENABLED, HCI_CONN_AES_CCM, HCI_CONN_POWER_SAVE, HCI_CONN_FLUSH_KEY, HCI_CONN_ENCRYPT, HCI_CONN_AUTH, HCI_CONN_SECURE, HCI_CONN_FIPS, HCI_CONN_STK_ENCRYPT, HCI_CONN_AUTH_INITIATOR, HCI_CONN_DROP, HCI_CONN_PARAM_REMOVAL_PEND, HCI_CONN_NEW_LINK_KEY, HCI_CONN_SCANNING, HCI_CONN_AUTH_FAILURE, }; static inline bool hci_conn_ssp_enabled(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; return hci_dev_test_flag(hdev, HCI_SSP_ENABLED) && test_bit(HCI_CONN_SSP_ENABLED, &conn->flags); } static inline bool hci_conn_sc_enabled(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; return hci_dev_test_flag(hdev, HCI_SC_ENABLED) && test_bit(HCI_CONN_SC_ENABLED, &conn->flags); } static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c) { struct hci_conn_hash *h = &hdev->conn_hash; list_add_rcu(&c->list, &h->list); switch (c->type) { case ACL_LINK: h->acl_num++; break; case AMP_LINK: h->amp_num++; break; case LE_LINK: h->le_num++; if (c->role == HCI_ROLE_SLAVE) h->le_num_slave++; break; case SCO_LINK: case ESCO_LINK: h->sco_num++; break; } } static inline void hci_conn_hash_del(struct hci_dev *hdev, struct hci_conn *c) { struct hci_conn_hash *h = &hdev->conn_hash; list_del_rcu(&c->list); synchronize_rcu(); switch (c->type) { case ACL_LINK: h->acl_num--; break; case AMP_LINK: h->amp_num--; break; case LE_LINK: h->le_num--; if (c->role == HCI_ROLE_SLAVE) h->le_num_slave--; break; case SCO_LINK: case ESCO_LINK: h->sco_num--; break; } } static inline unsigned int hci_conn_num(struct hci_dev *hdev, __u8 type) { struct hci_conn_hash *h = &hdev->conn_hash; switch (type) { case ACL_LINK: return h->acl_num; case AMP_LINK: return h->amp_num; case LE_LINK: return h->le_num; case SCO_LINK: case ESCO_LINK: return h->sco_num; default: return 0; } } static inline unsigned int hci_conn_count(struct hci_dev *hdev) { struct hci_conn_hash *c = &hdev->conn_hash; return c->acl_num + c->amp_num + c->sco_num + c->le_num; } static inline __u8 hci_conn_lookup_type(struct hci_dev *hdev, __u16 handle) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; __u8 type = INVALID_LINK; rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { if (c->handle == handle) { type = c->type; break; } } rcu_read_unlock(); return type; } static inline struct hci_conn *hci_conn_hash_lookup_handle(struct hci_dev *hdev, __u16 handle) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { if (c->handle == handle) { rcu_read_unlock(); return c; } } rcu_read_unlock(); return NULL; } static inline struct hci_conn *hci_conn_hash_lookup_ba(struct hci_dev *hdev, __u8 type, bdaddr_t *ba) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { if (c->type == type && !bacmp(&c->dst, ba)) { rcu_read_unlock(); return c; } } rcu_read_unlock(); return NULL; } static inline struct hci_conn *hci_conn_hash_lookup_le(struct hci_dev *hdev, bdaddr_t *ba, __u8 ba_type) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { if (c->type != LE_LINK) continue; if (ba_type == c->dst_type && !bacmp(&c->dst, ba)) { rcu_read_unlock(); return c; } } rcu_read_unlock(); return NULL; } static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev, __u8 type, __u16 state) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { if (c->type == type && c->state == state) { rcu_read_unlock(); return c; } } rcu_read_unlock(); return NULL; } static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { if (c->type == LE_LINK && c->state == BT_CONNECT && !test_bit(HCI_CONN_SCANNING, &c->flags)) { rcu_read_unlock(); return c; } } rcu_read_unlock(); return NULL; } int hci_disconnect(struct hci_conn *conn, __u8 reason); bool hci_setup_sync(struct hci_conn *conn, __u16 handle); void hci_sco_setup(struct hci_conn *conn, __u8 status); struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, u8 role); int hci_conn_del(struct hci_conn *conn); void hci_conn_hash_flush(struct hci_dev *hdev); void hci_conn_check_pending(struct hci_dev *hdev); struct hci_chan *hci_chan_create(struct hci_conn *conn); void hci_chan_del(struct hci_chan *chan); void hci_chan_list_flush(struct hci_conn *conn); struct hci_chan *hci_chan_lookup_handle(struct hci_dev *hdev, __u16 handle); struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, u16 conn_timeout, enum conn_reasons conn_reason); struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, u16 conn_timeout, u8 role, bdaddr_t *direct_rpa); struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, u8 sec_level, u8 auth_type, enum conn_reasons conn_reason); struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, __u16 setting); int hci_conn_check_link_mode(struct hci_conn *conn); int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level); int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type, bool initiator); int hci_conn_switch_role(struct hci_conn *conn, __u8 role); void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active); void hci_le_conn_failed(struct hci_conn *conn, u8 status); /* * hci_conn_get() and hci_conn_put() are used to control the life-time of an * "hci_conn" object. They do not guarantee that the hci_conn object is running, * working or anything else. They just guarantee that the object is available * and can be dereferenced. So you can use its locks, local variables and any * other constant data. * Before accessing runtime data, you _must_ lock the object and then check that * it is still running. As soon as you release the locks, the connection might * get dropped, though. * * On the other hand, hci_conn_hold() and hci_conn_drop() are used to control * how long the underlying connection is held. So every channel that runs on the * hci_conn object calls this to prevent the connection from disappearing. As * long as you hold a device, you must also guarantee that you have a valid * reference to the device via hci_conn_get() (or the initial reference from * hci_conn_add()). * The hold()/drop() ref-count is known to drop below 0 sometimes, which doesn't * break because nobody cares for that. But this means, we cannot use * _get()/_drop() in it, but require the caller to have a valid ref (FIXME). */ static inline struct hci_conn *hci_conn_get(struct hci_conn *conn) { get_device(&conn->dev); return conn; } static inline void hci_conn_put(struct hci_conn *conn) { put_device(&conn->dev); } static inline void hci_conn_hold(struct hci_conn *conn) { BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt)); atomic_inc(&conn->refcnt); cancel_delayed_work(&conn->disc_work); } static inline void hci_conn_drop(struct hci_conn *conn) { BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt)); if (atomic_dec_and_test(&conn->refcnt)) { unsigned long timeo; switch (conn->type) { case ACL_LINK: case LE_LINK: cancel_delayed_work(&conn->idle_work); if (conn->state == BT_CONNECTED) { timeo = conn->disc_timeout; if (!conn->out) timeo *= 2; } else { timeo = 0; } break; case AMP_LINK: timeo = conn->disc_timeout; break; default: timeo = 0; break; } cancel_delayed_work(&conn->disc_work); queue_delayed_work(conn->hdev->workqueue, &conn->disc_work, timeo); } } /* ----- HCI Devices ----- */ static inline void hci_dev_put(struct hci_dev *d) { BT_DBG("%s orig refcnt %d", d->name, kref_read(&d->dev.kobj.kref)); put_device(&d->dev); } static inline struct hci_dev *hci_dev_hold(struct hci_dev *d) { BT_DBG("%s orig refcnt %d", d->name, kref_read(&d->dev.kobj.kref)); get_device(&d->dev); return d; } #define hci_dev_lock(d) mutex_lock(&d->lock) #define hci_dev_unlock(d) mutex_unlock(&d->lock) #define to_hci_dev(d) container_of(d, struct hci_dev, dev) #define to_hci_conn(c) container_of(c, struct hci_conn, dev) static inline void *hci_get_drvdata(struct hci_dev *hdev) { return dev_get_drvdata(&hdev->dev); } static inline void hci_set_drvdata(struct hci_dev *hdev, void *data) { dev_set_drvdata(&hdev->dev, data); } struct hci_dev *hci_dev_get(int index); struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, u8 src_type); struct hci_dev *hci_alloc_dev(void); void hci_free_dev(struct hci_dev *hdev); int hci_register_dev(struct hci_dev *hdev); void hci_unregister_dev(struct hci_dev *hdev); void hci_cleanup_dev(struct hci_dev *hdev); int hci_suspend_dev(struct hci_dev *hdev); int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb); int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb); __printf(2, 3) void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...); __printf(2, 3) void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...); static inline void hci_set_msft_opcode(struct hci_dev *hdev, __u16 opcode) { #if IS_ENABLED(CONFIG_BT_MSFTEXT) hdev->msft_opcode = opcode; #endif } int hci_dev_open(__u16 dev); int hci_dev_close(__u16 dev); int hci_dev_do_close(struct hci_dev *hdev); int hci_dev_reset(__u16 dev); int hci_dev_reset_stat(__u16 dev); int hci_dev_cmd(unsigned int cmd, void __user *arg); int hci_get_dev_list(void __user *arg); int hci_get_dev_info(void __user *arg); int hci_get_conn_list(void __user *arg); int hci_get_conn_info(struct hci_dev *hdev, void __user *arg); int hci_get_auth_info(struct hci_dev *hdev, void __user *arg); int hci_inquiry(void __user *arg); struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *list, bdaddr_t *bdaddr, u8 type); struct bdaddr_list_with_irk *hci_bdaddr_list_lookup_with_irk( struct list_head *list, bdaddr_t *bdaddr, u8 type); struct bdaddr_list_with_flags * hci_bdaddr_list_lookup_with_flags(struct list_head *list, bdaddr_t *bdaddr, u8 type); int hci_bdaddr_list_add(struct list_head *list, bdaddr_t *bdaddr, u8 type); int hci_bdaddr_list_add_with_irk(struct list_head *list, bdaddr_t *bdaddr, u8 type, u8 *peer_irk, u8 *local_irk); int hci_bdaddr_list_add_with_flags(struct list_head *list, bdaddr_t *bdaddr, u8 type, u32 flags); int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type); int hci_bdaddr_list_del_with_irk(struct list_head *list, bdaddr_t *bdaddr, u8 type); int hci_bdaddr_list_del_with_flags(struct list_head *list, bdaddr_t *bdaddr, u8 type); void hci_bdaddr_list_clear(struct list_head *list); struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type); struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type); void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type); void hci_conn_params_clear_disabled(struct hci_dev *hdev); struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, bdaddr_t *addr, u8 addr_type); void hci_uuids_clear(struct hci_dev *hdev); void hci_link_keys_clear(struct hci_dev *hdev); struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr); struct link_key *hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, bdaddr_t *bdaddr, u8 *val, u8 type, u8 pin_len, bool *persistent); struct smp_ltk *hci_add_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type, u8 type, u8 authenticated, u8 tk[16], u8 enc_size, __le16 ediv, __le64 rand); struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type, u8 role); int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type); void hci_smp_ltks_clear(struct hci_dev *hdev); int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr); struct smp_irk *hci_find_irk_by_rpa(struct hci_dev *hdev, bdaddr_t *rpa); struct smp_irk *hci_find_irk_by_addr(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type); struct smp_irk *hci_add_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type, u8 val[16], bdaddr_t *rpa); void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type); bool hci_is_blocked_key(struct hci_dev *hdev, u8 type, u8 val[16]); void hci_blocked_keys_clear(struct hci_dev *hdev); void hci_smp_irks_clear(struct hci_dev *hdev); bool hci_bdaddr_is_paired(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type); void hci_remote_oob_data_clear(struct hci_dev *hdev); struct oob_data *hci_find_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type); int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type, u8 *hash192, u8 *rand192, u8 *hash256, u8 *rand256); int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type); void hci_adv_instances_clear(struct hci_dev *hdev); struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance); struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance); int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, u16 adv_data_len, u8 *adv_data, u16 scan_rsp_len, u8 *scan_rsp_data, u16 timeout, u16 duration); int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance); void hci_adv_instances_set_rpa_expired(struct hci_dev *hdev, bool rpa_expired); void hci_adv_monitors_clear(struct hci_dev *hdev); void hci_free_adv_monitor(struct adv_monitor *monitor); int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor); int hci_remove_adv_monitor(struct hci_dev *hdev, u16 handle); bool hci_is_adv_monitoring(struct hci_dev *hdev); void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb); void hci_init_sysfs(struct hci_dev *hdev); void hci_conn_init_sysfs(struct hci_conn *conn); void hci_conn_add_sysfs(struct hci_conn *conn); void hci_conn_del_sysfs(struct hci_conn *conn); #define SET_HCIDEV_DEV(hdev, pdev) ((hdev)->dev.parent = (pdev)) /* ----- LMP capabilities ----- */ #define lmp_encrypt_capable(dev) ((dev)->features[0][0] & LMP_ENCRYPT) #define lmp_rswitch_capable(dev) ((dev)->features[0][0] & LMP_RSWITCH) #define lmp_hold_capable(dev) ((dev)->features[0][0] & LMP_HOLD) #define lmp_sniff_capable(dev) ((dev)->features[0][0] & LMP_SNIFF) #define lmp_park_capable(dev) ((dev)->features[0][1] & LMP_PARK) #define lmp_inq_rssi_capable(dev) ((dev)->features[0][3] & LMP_RSSI_INQ) #define lmp_esco_capable(dev) ((dev)->features[0][3] & LMP_ESCO) #define lmp_bredr_capable(dev) (!((dev)->features[0][4] & LMP_NO_BREDR)) #define lmp_le_capable(dev) ((dev)->features[0][4] & LMP_LE) #define lmp_sniffsubr_capable(dev) ((dev)->features[0][5] & LMP_SNIFF_SUBR) #define lmp_pause_enc_capable(dev) ((dev)->features[0][5] & LMP_PAUSE_ENC) #define lmp_ext_inq_capable(dev) ((dev)->features[0][6] & LMP_EXT_INQ) #define lmp_le_br_capable(dev) (!!((dev)->features[0][6] & LMP_SIMUL_LE_BR)) #define lmp_ssp_capable(dev) ((dev)->features[0][6] & LMP_SIMPLE_PAIR) #define lmp_no_flush_capable(dev) ((dev)->features[0][6] & LMP_NO_FLUSH) #define lmp_lsto_capable(dev) ((dev)->features[0][7] & LMP_LSTO) #define lmp_inq_tx_pwr_capable(dev) ((dev)->features[0][7] & LMP_INQ_TX_PWR) #define lmp_ext_feat_capable(dev) ((dev)->features[0][7] & LMP_EXTFEATURES) #define lmp_transp_capable(dev) ((dev)->features[0][2] & LMP_TRANSPARENT) #define lmp_edr_2m_capable(dev) ((dev)->features[0][3] & LMP_EDR_2M) #define lmp_edr_3m_capable(dev) ((dev)->features[0][3] & LMP_EDR_3M) #define lmp_edr_3slot_capable(dev) ((dev)->features[0][4] & LMP_EDR_3SLOT) #define lmp_edr_5slot_capable(dev) ((dev)->features[0][5] & LMP_EDR_5SLOT) /* ----- Extended LMP capabilities ----- */ #define lmp_csb_master_capable(dev) ((dev)->features[2][0] & LMP_CSB_MASTER) #define lmp_csb_slave_capable(dev) ((dev)->features[2][0] & LMP_CSB_SLAVE) #define lmp_sync_train_capable(dev) ((dev)->features[2][0] & LMP_SYNC_TRAIN) #define lmp_sync_scan_capable(dev) ((dev)->features[2][0] & LMP_SYNC_SCAN) #define lmp_sc_capable(dev) ((dev)->features[2][1] & LMP_SC) #define lmp_ping_capable(dev) ((dev)->features[2][1] & LMP_PING) /* ----- Host capabilities ----- */ #define lmp_host_ssp_capable(dev) ((dev)->features[1][0] & LMP_HOST_SSP) #define lmp_host_sc_capable(dev) ((dev)->features[1][0] & LMP_HOST_SC) #define lmp_host_le_capable(dev) (!!((dev)->features[1][0] & LMP_HOST_LE)) #define lmp_host_le_br_capable(dev) (!!((dev)->features[1][0] & LMP_HOST_LE_BREDR)) #define hdev_is_powered(dev) (test_bit(HCI_UP, &(dev)->flags) && \ !hci_dev_test_flag(dev, HCI_AUTO_OFF)) #define bredr_sc_enabled(dev) (lmp_sc_capable(dev) && \ hci_dev_test_flag(dev, HCI_SC_ENABLED)) #define scan_1m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_1M) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_1M)) #define scan_2m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_2M) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_2M)) #define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED)) /* Use LL Privacy based address resolution if supported */ #define use_ll_privacy(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY) /* Use ext scanning if set ext scan param and ext scan enable is supported */ #define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \ ((dev)->commands[37] & 0x40)) /* Use ext create connection if command is supported */ #define use_ext_conn(dev) ((dev)->commands[37] & 0x80) /* Extended advertising support */ #define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV)) /* ----- HCI protocols ----- */ #define HCI_PROTO_DEFER 0x01 static inline int hci_proto_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 type, __u8 *flags) { switch (type) { case ACL_LINK: return l2cap_connect_ind(hdev, bdaddr); case SCO_LINK: case ESCO_LINK: return sco_connect_ind(hdev, bdaddr, flags); default: BT_ERR("unknown link type %d", type); return -EINVAL; } } static inline int hci_proto_disconn_ind(struct hci_conn *conn) { if (conn->type != ACL_LINK && conn->type != LE_LINK) return HCI_ERROR_REMOTE_USER_TERM; return l2cap_disconn_ind(conn); } /* ----- HCI callbacks ----- */ struct hci_cb { struct list_head list; char *name; void (*connect_cfm) (struct hci_conn *conn, __u8 status); void (*disconn_cfm) (struct hci_conn *conn, __u8 status); void (*security_cfm) (struct hci_conn *conn, __u8 status, __u8 encrypt); void (*key_change_cfm) (struct hci_conn *conn, __u8 status); void (*role_switch_cfm) (struct hci_conn *conn, __u8 status, __u8 role); }; static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status) { struct hci_cb *cb; mutex_lock(&hci_cb_list_lock); list_for_each_entry(cb, &hci_cb_list, list) { if (cb->connect_cfm) cb->connect_cfm(conn, status); } mutex_unlock(&hci_cb_list_lock); if (conn->connect_cfm_cb) conn->connect_cfm_cb(conn, status); } static inline void hci_disconn_cfm(struct hci_conn *conn, __u8 reason) { struct hci_cb *cb; mutex_lock(&hci_cb_list_lock); list_for_each_entry(cb, &hci_cb_list, list) { if (cb->disconn_cfm) cb->disconn_cfm(conn, reason); } mutex_unlock(&hci_cb_list_lock); if (conn->disconn_cfm_cb) conn->disconn_cfm_cb(conn, reason); } static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status) { struct hci_cb *cb; __u8 encrypt; if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) return; encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00; mutex_lock(&hci_cb_list_lock); list_for_each_entry(cb, &hci_cb_list, list) { if (cb->security_cfm) cb->security_cfm(conn, status, encrypt); } mutex_unlock(&hci_cb_list_lock); if (conn->security_cfm_cb) conn->security_cfm_cb(conn, status); } static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) { struct hci_cb *cb; __u8 encrypt; if (conn->state == BT_CONFIG) { if (!status) conn->state = BT_CONNECTED; hci_connect_cfm(conn, status); hci_conn_drop(conn); return; } if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags)) encrypt = 0x00; else if (test_bit(HCI_CONN_AES_CCM, &conn->flags)) encrypt = 0x02; else encrypt = 0x01; if (!status) { if (conn->sec_level == BT_SECURITY_SDP) conn->sec_level = BT_SECURITY_LOW; if (conn->pending_sec_level > conn->sec_level) conn->sec_level = conn->pending_sec_level; } mutex_lock(&hci_cb_list_lock); list_for_each_entry(cb, &hci_cb_list, list) { if (cb->security_cfm) cb->security_cfm(conn, status, encrypt); } mutex_unlock(&hci_cb_list_lock); if (conn->security_cfm_cb) conn->security_cfm_cb(conn, status); } static inline void hci_key_change_cfm(struct hci_conn *conn, __u8 status) { struct hci_cb *cb; mutex_lock(&hci_cb_list_lock); list_for_each_entry(cb, &hci_cb_list, list) { if (cb->key_change_cfm) cb->key_change_cfm(conn, status); } mutex_unlock(&hci_cb_list_lock); } static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status, __u8 role) { struct hci_cb *cb; mutex_lock(&hci_cb_list_lock); list_for_each_entry(cb, &hci_cb_list, list) { if (cb->role_switch_cfm) cb->role_switch_cfm(conn, status, role); } mutex_unlock(&hci_cb_list_lock); } static inline void *eir_get_data(u8 *eir, size_t eir_len, u8 type, size_t *data_len) { size_t parsed = 0; if (eir_len < 2) return NULL; while (parsed < eir_len - 1) { u8 field_len = eir[0]; if (field_len == 0) break; parsed += field_len + 1; if (parsed > eir_len) break; if (eir[1] != type) { eir += field_len + 1; continue; } /* Zero length data */ if (field_len == 1) return NULL; if (data_len) *data_len = field_len - 1; return &eir[2]; } return NULL; } static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type) { if (addr_type != ADDR_LE_DEV_RANDOM) return false; if ((bdaddr->b[5] & 0xc0) == 0x40) return true; return false; } static inline bool hci_is_identity_address(bdaddr_t *addr, u8 addr_type) { if (addr_type == ADDR_LE_DEV_PUBLIC) return true; /* Check for Random Static address type */ if ((addr->b[5] & 0xc0) == 0xc0) return true; return false; } static inline struct smp_irk *hci_get_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type) { if (!hci_bdaddr_is_rpa(bdaddr, addr_type)) return NULL; return hci_find_irk_by_rpa(hdev, bdaddr); } static inline int hci_check_conn_params(u16 min, u16 max, u16 latency, u16 to_multiplier) { u16 max_latency; if (min > max || min < 6 || max > 3200) return -EINVAL; if (to_multiplier < 10 || to_multiplier > 3200) return -EINVAL; if (max >= to_multiplier * 8) return -EINVAL; max_latency = (to_multiplier * 4 / max) - 1; if (latency > 499 || latency > max_latency) return -EINVAL; return 0; } int hci_register_cb(struct hci_cb *hcb); int hci_unregister_cb(struct hci_cb *hcb); struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u32 timeout); struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u8 event, u32 timeout); int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, const void *param); void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags); void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb); void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode); struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u32 timeout); u32 hci_conn_get_phy(struct hci_conn *conn); /* ----- HCI Sockets ----- */ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb); void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, int flag, struct sock *skip_sk); void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb); void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event, void *data, u16 data_len, ktime_t tstamp, int flag, struct sock *skip_sk); void hci_sock_dev_event(struct hci_dev *hdev, int event); #define HCI_MGMT_VAR_LEN BIT(0) #define HCI_MGMT_NO_HDEV BIT(1) #define HCI_MGMT_UNTRUSTED BIT(2) #define HCI_MGMT_UNCONFIGURED BIT(3) #define HCI_MGMT_HDEV_OPTIONAL BIT(4) struct hci_mgmt_handler { int (*func) (struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len); size_t data_len; unsigned long flags; }; struct hci_mgmt_chan { struct list_head list; unsigned short channel; size_t handler_count; const struct hci_mgmt_handler *handlers; void (*hdev_init) (struct sock *sk, struct hci_dev *hdev); }; int hci_mgmt_chan_register(struct hci_mgmt_chan *c); void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c); /* Management interface */ #define DISCOV_TYPE_BREDR (BIT(BDADDR_BREDR)) #define DISCOV_TYPE_LE (BIT(BDADDR_LE_PUBLIC) | \ BIT(BDADDR_LE_RANDOM)) #define DISCOV_TYPE_INTERLEAVED (BIT(BDADDR_BREDR) | \ BIT(BDADDR_LE_PUBLIC) | \ BIT(BDADDR_LE_RANDOM)) /* These LE scan and inquiry parameters were chosen according to LE General * Discovery Procedure specification. */ #define DISCOV_LE_SCAN_WIN 0x12 #define DISCOV_LE_SCAN_INT 0x12 #define DISCOV_LE_TIMEOUT 10240 /* msec */ #define DISCOV_INTERLEAVED_TIMEOUT 5120 /* msec */ #define DISCOV_INTERLEAVED_INQUIRY_LEN 0x04 #define DISCOV_BREDR_INQUIRY_LEN 0x08 #define DISCOV_LE_RESTART_DELAY msecs_to_jiffies(200) /* msec */ #define DISCOV_LE_FAST_ADV_INT_MIN 100 /* msec */ #define DISCOV_LE_FAST_ADV_INT_MAX 150 /* msec */ void mgmt_fill_version_info(void *ver); int mgmt_new_settings(struct hci_dev *hdev); void mgmt_index_added(struct hci_dev *hdev); void mgmt_index_removed(struct hci_dev *hdev); void mgmt_set_powered_failed(struct hci_dev *hdev, int err); void mgmt_power_on(struct hci_dev *hdev, int err); void __mgmt_power_off(struct hci_dev *hdev); void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bool persistent); void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, u32 flags, u8 *name, u8 name_len); void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 reason, bool mgmt_connected); void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status); void mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status); void mgmt_pin_code_request(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 secure); void mgmt_pin_code_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 status); void mgmt_pin_code_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 status); int mgmt_user_confirm_request(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u32 value, u8 confirm_hint); int mgmt_user_confirm_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status); int mgmt_user_confirm_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status); int mgmt_user_passkey_request(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type); int mgmt_user_passkey_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status); int mgmt_user_passkey_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status); int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u32 passkey, u8 entered); void mgmt_auth_failed(struct hci_conn *conn, u8 status); void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status); void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status); void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, u8 status); void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status); void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status); void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status); void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len); void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, s8 rssi, u8 *name, u8 name_len); void mgmt_discovering(struct hci_dev *hdev, u8 discovering); void mgmt_suspending(struct hci_dev *hdev, u8 state); void mgmt_resuming(struct hci_dev *hdev, u8 reason, bdaddr_t *bdaddr, u8 addr_type); bool mgmt_powering_down(struct hci_dev *hdev); void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent); void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent); void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk, bool persistent); void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type, u8 store_hint, u16 min_interval, u16 max_interval, u16 latency, u16 timeout); void mgmt_smp_complete(struct hci_conn *conn, bool complete); bool mgmt_get_connectable(struct hci_dev *hdev); void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status); void mgmt_set_discoverable_complete(struct hci_dev *hdev, u8 status); u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev); void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, u8 instance); int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip); u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, __u8 ltk[16], __u8 key_size); void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 *bdaddr_type); #define SCO_AIRMODE_MASK 0x0003 #define SCO_AIRMODE_CVSD 0x0000 #define SCO_AIRMODE_TRANSP 0x0003 #endif /* __HCI_CORE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_HIGHMEM_H #define _LINUX_HIGHMEM_H #include <linux/fs.h> #include <linux/kernel.h> #include <linux/bug.h> #include <linux/mm.h> #include <linux/uaccess.h> #include <linux/hardirq.h> #include <asm/cacheflush.h> #ifndef ARCH_HAS_FLUSH_ANON_PAGE static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) { } #endif #ifndef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE static inline void flush_kernel_dcache_page(struct page *page) { } static inline void flush_kernel_vmap_range(void *vaddr, int size) { } static inline void invalidate_kernel_vmap_range(void *vaddr, int size) { } #endif #include <asm/kmap_types.h> #ifdef CONFIG_HIGHMEM extern void *kmap_atomic_high_prot(struct page *page, pgprot_t prot); extern void kunmap_atomic_high(void *kvaddr); #include <asm/highmem.h> #ifndef ARCH_HAS_KMAP_FLUSH_TLB static inline void kmap_flush_tlb(unsigned long addr) { } #endif #ifndef kmap_prot #define kmap_prot PAGE_KERNEL #endif void *kmap_high(struct page *page); static inline void *kmap(struct page *page) { void *addr; might_sleep(); if (!PageHighMem(page)) addr = page_address(page); else addr = kmap_high(page); kmap_flush_tlb((unsigned long)addr); return addr; } void kunmap_high(struct page *page); static inline void kunmap(struct page *page) { might_sleep(); if (!PageHighMem(page)) return; kunmap_high(page); } /* * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because * no global lock is needed and because the kmap code must perform a global TLB * invalidation when the kmap pool wraps. * * However when holding an atomic kmap it is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. * * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap * gives a more generic (and caching) interface. But kmap_atomic can * be used in IRQ contexts, so in some (very limited) cases we need * it. */ static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) { preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); return kmap_atomic_high_prot(page, prot); } #define kmap_atomic(page) kmap_atomic_prot(page, kmap_prot) /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); extern atomic_long_t _totalhigh_pages; static inline unsigned long totalhigh_pages(void) { return (unsigned long)atomic_long_read(&_totalhigh_pages); } static inline void totalhigh_pages_inc(void) { atomic_long_inc(&_totalhigh_pages); } static inline void totalhigh_pages_dec(void) { atomic_long_dec(&_totalhigh_pages); } static inline void totalhigh_pages_add(long count) { atomic_long_add(count, &_totalhigh_pages); } static inline void totalhigh_pages_set(long val) { atomic_long_set(&_totalhigh_pages, val); } void kmap_flush_unused(void); struct page *kmap_to_page(void *addr); #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } static inline struct page *kmap_to_page(void *addr) { return virt_to_page(addr); } static inline unsigned long totalhigh_pages(void) { return 0UL; } static inline void *kmap(struct page *page) { might_sleep(); return page_address(page); } static inline void kunmap_high(struct page *page) { } static inline void kunmap(struct page *page) { #ifdef ARCH_HAS_FLUSH_ON_KUNMAP kunmap_flush_on_unmap(page_address(page)); #endif } static inline void *kmap_atomic(struct page *page) { preempt_disable(); pagefault_disable(); return page_address(page); } #define kmap_atomic_prot(page, prot) kmap_atomic(page) static inline void kunmap_atomic_high(void *addr) { /* * Mostly nothing to do in the CONFIG_HIGHMEM=n case as kunmap_atomic() * handles re-enabling faults + preemption */ #ifdef ARCH_HAS_FLUSH_ON_KUNMAP kunmap_flush_on_unmap(addr); #endif } #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) #define kmap_flush_unused() do {} while(0) #endif /* CONFIG_HIGHMEM */ #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) DECLARE_PER_CPU(int, __kmap_atomic_idx); static inline int kmap_atomic_idx_push(void) { int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1; #ifdef CONFIG_DEBUG_HIGHMEM WARN_ON_ONCE(in_irq() && !irqs_disabled()); BUG_ON(idx >= KM_TYPE_NR); #endif return idx; } static inline int kmap_atomic_idx(void) { return __this_cpu_read(__kmap_atomic_idx) - 1; } static inline void kmap_atomic_idx_pop(void) { #ifdef CONFIG_DEBUG_HIGHMEM int idx = __this_cpu_dec_return(__kmap_atomic_idx); BUG_ON(idx < 0); #else __this_cpu_dec(__kmap_atomic_idx); #endif } #endif /* * Prevent people trying to call kunmap_atomic() as if it were kunmap() * kunmap_atomic() should get the return value of kmap_atomic, not the page. */ #define kunmap_atomic(addr) \ do { \ BUILD_BUG_ON(__same_type((addr), struct page *)); \ kunmap_atomic_high(addr); \ pagefault_enable(); \ preempt_enable(); \ } while (0) /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ #ifndef clear_user_highpage static inline void clear_user_highpage(struct page *page, unsigned long vaddr) { void *addr = kmap_atomic(page); clear_user_page(addr, vaddr, page); kunmap_atomic(addr); } #endif #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /** * __alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA with caller-specified movable GFP flags * @movableflags: The GFP flags related to the pages future ability to move like __GFP_MOVABLE * @vma: The VMA the page is to be allocated for * @vaddr: The virtual address the page will be inserted into * * This function will allocate a page for a VMA but the caller is expected * to specify via movableflags whether the page will be movable in the * future or not * * An architecture may override this function by defining * __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE and providing their own * implementation. */ static inline struct page * __alloc_zeroed_user_highpage(gfp_t movableflags, struct vm_area_struct *vma, unsigned long vaddr) { struct page *page = alloc_page_vma(GFP_HIGHUSER | movableflags, vma, vaddr); if (page) clear_user_highpage(page, vaddr); return page; } #endif /** * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move * @vma: The VMA the page is to be allocated for * @vaddr: The virtual address the page will be inserted into * * This function will allocate a page for a VMA that the caller knows will * be able to migrate in the future using move_pages() or reclaimed */ static inline struct page * alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, unsigned long vaddr) { return __alloc_zeroed_user_highpage(__GFP_MOVABLE, vma, vaddr); } static inline void clear_highpage(struct page *page) { void *kaddr = kmap_atomic(page); clear_page(kaddr); kunmap_atomic(kaddr); } static inline void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2) { void *kaddr = kmap_atomic(page); BUG_ON(end1 > PAGE_SIZE || end2 > PAGE_SIZE); if (end1 > start1) memset(kaddr + start1, 0, end1 - start1); if (end2 > start2) memset(kaddr + start2, 0, end2 - start2); kunmap_atomic(kaddr); flush_dcache_page(page); } static inline void zero_user_segment(struct page *page, unsigned start, unsigned end) { zero_user_segments(page, start, end, 0, 0); } static inline void zero_user(struct page *page, unsigned start, unsigned size) { zero_user_segments(page, start, start + size, 0, 0); } #ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE static inline void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { char *vfrom, *vto; vfrom = kmap_atomic(from); vto = kmap_atomic(to); copy_user_page(vto, vfrom, vaddr, to); kunmap_atomic(vto); kunmap_atomic(vfrom); } #endif #ifndef __HAVE_ARCH_COPY_HIGHPAGE static inline void copy_highpage(struct page *to, struct page *from) { char *vfrom, *vto; vfrom = kmap_atomic(from); vto = kmap_atomic(to); copy_page(vto, vfrom); kunmap_atomic(vto); kunmap_atomic(vfrom); } #endif #endif /* _LINUX_HIGHMEM_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 /* SPDX-License-Identifier: GPL-2.0 */ /* * Routines to manage notifier chains for passing status changes to any * interested routines. We need this instead of hard coded call lists so * that modules can poke their nose into the innards. The network devices * needed them so here they are for the rest of you. * * Alan Cox <Alan.Cox@linux.org> */ #ifndef _LINUX_NOTIFIER_H #define _LINUX_NOTIFIER_H #include <linux/errno.h> #include <linux/mutex.h> #include <linux/rwsem.h> #include <linux/srcu.h> /* * Notifier chains are of four types: * * Atomic notifier chains: Chain callbacks run in interrupt/atomic * context. Callouts are not allowed to block. * Blocking notifier chains: Chain callbacks run in process context. * Callouts are allowed to block. * Raw notifier chains: There are no restrictions on callbacks, * registration, or unregistration. All locking and protection * must be provided by the caller. * SRCU notifier chains: A variant of blocking notifier chains, with * the same restrictions. * * atomic_notifier_chain_register() may be called from an atomic context, * but blocking_notifier_chain_register() and srcu_notifier_chain_register() * must be called from a process context. Ditto for the corresponding * _unregister() routines. * * atomic_notifier_chain_unregister(), blocking_notifier_chain_unregister(), * and srcu_notifier_chain_unregister() _must not_ be called from within * the call chain. * * SRCU notifier chains are an alternative form of blocking notifier chains. * They use SRCU (Sleepable Read-Copy Update) instead of rw-semaphores for * protection of the chain links. This means there is _very_ low overhead * in srcu_notifier_call_chain(): no cache bounces and no memory barriers. * As compensation, srcu_notifier_chain_unregister() is rather expensive. * SRCU notifier chains should be used when the chain will be called very * often but notifier_blocks will seldom be removed. */ struct notifier_block; typedef int (*notifier_fn_t)(struct notifier_block *nb, unsigned long action, void *data); struct notifier_block { notifier_fn_t notifier_call; struct notifier_block __rcu *next; int priority; }; struct atomic_notifier_head { spinlock_t lock; struct notifier_block __rcu *head; }; struct blocking_notifier_head { struct rw_semaphore rwsem; struct notifier_block __rcu *head; }; struct raw_notifier_head { struct notifier_block __rcu *head; }; struct srcu_notifier_head { struct mutex mutex; struct srcu_struct srcu; struct notifier_block __rcu *head; }; #define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ spin_lock_init(&(name)->lock); \ (name)->head = NULL; \ } while (0) #define BLOCKING_INIT_NOTIFIER_HEAD(name) do { \ init_rwsem(&(name)->rwsem); \ (name)->head = NULL; \ } while (0) #define RAW_INIT_NOTIFIER_HEAD(name) do { \ (name)->head = NULL; \ } while (0) /* srcu_notifier_heads must be cleaned up dynamically */ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); #define srcu_cleanup_notifier_head(name) \ cleanup_srcu_struct(&(name)->srcu); #define ATOMIC_NOTIFIER_INIT(name) { \ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ .head = NULL } #define BLOCKING_NOTIFIER_INIT(name) { \ .rwsem = __RWSEM_INITIALIZER((name).rwsem), \ .head = NULL } #define RAW_NOTIFIER_INIT(name) { \ .head = NULL } #define SRCU_NOTIFIER_INIT(name, pcpu) \ { \ .mutex = __MUTEX_INITIALIZER(name.mutex), \ .head = NULL, \ .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \ } #define ATOMIC_NOTIFIER_HEAD(name) \ struct atomic_notifier_head name = \ ATOMIC_NOTIFIER_INIT(name) #define BLOCKING_NOTIFIER_HEAD(name) \ struct blocking_notifier_head name = \ BLOCKING_NOTIFIER_INIT(name) #define RAW_NOTIFIER_HEAD(name) \ struct raw_notifier_head name = \ RAW_NOTIFIER_INIT(name) #ifdef CONFIG_TREE_SRCU #define _SRCU_NOTIFIER_HEAD(name, mod) \ static DEFINE_PER_CPU(struct srcu_data, name##_head_srcu_data); \ mod struct srcu_notifier_head name = \ SRCU_NOTIFIER_INIT(name, name##_head_srcu_data) #else #define _SRCU_NOTIFIER_HEAD(name, mod) \ mod struct srcu_notifier_head name = \ SRCU_NOTIFIER_INIT(name, name) #endif #define SRCU_NOTIFIER_HEAD(name) \ _SRCU_NOTIFIER_HEAD(name, /* not static */) #define SRCU_NOTIFIER_HEAD_STATIC(name) \ _SRCU_NOTIFIER_HEAD(name, static) #ifdef __KERNEL__ extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh, struct notifier_block *nb); extern int blocking_notifier_chain_register(struct blocking_notifier_head *nh, struct notifier_block *nb); extern int raw_notifier_chain_register(struct raw_notifier_head *nh, struct notifier_block *nb); extern int srcu_notifier_chain_register(struct srcu_notifier_head *nh, struct notifier_block *nb); extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, struct notifier_block *nb); extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, struct notifier_block *nb); extern int raw_notifier_chain_unregister(struct raw_notifier_head *nh, struct notifier_block *nb); extern int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh, struct notifier_block *nb); extern int atomic_notifier_call_chain(struct atomic_notifier_head *nh, unsigned long val, void *v); extern int blocking_notifier_call_chain(struct blocking_notifier_head *nh, unsigned long val, void *v); extern int raw_notifier_call_chain(struct raw_notifier_head *nh, unsigned long val, void *v); extern int srcu_notifier_call_chain(struct srcu_notifier_head *nh, unsigned long val, void *v); extern int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh, unsigned long val_up, unsigned long val_down, void *v); extern int blocking_notifier_call_chain_robust(struct blocking_notifier_head *nh, unsigned long val_up, unsigned long val_down, void *v); extern int raw_notifier_call_chain_robust(struct raw_notifier_head *nh, unsigned long val_up, unsigned long val_down, void *v); #define NOTIFY_DONE 0x0000 /* Don't care */ #define NOTIFY_OK 0x0001 /* Suits me */ #define NOTIFY_STOP_MASK 0x8000 /* Don't call further */ #define NOTIFY_BAD (NOTIFY_STOP_MASK|0x0002) /* Bad/Veto action */ /* * Clean way to return from the notifier and stop further calls. */ #define NOTIFY_STOP (NOTIFY_OK|NOTIFY_STOP_MASK) /* Encapsulate (negative) errno value (in particular, NOTIFY_BAD <=> EPERM). */ static inline int notifier_from_errno(int err) { if (err) return NOTIFY_STOP_MASK | (NOTIFY_OK - err); return NOTIFY_OK; } /* Restore (negative) errno value from notify return value. */ static inline int notifier_to_errno(int ret) { ret &= ~NOTIFY_STOP_MASK; return ret > NOTIFY_OK ? NOTIFY_OK - ret : 0; } /* * Declared notifiers so far. I can imagine quite a few more chains * over time (eg laptop power reset chains, reboot chain (to clean * device units up), device [un]mount chain, module load/unload chain, * low memory chain, screenblank chain (for plug in modular screenblankers) * VC switch chains (for loadable kernel svgalib VC switch helpers) etc... */ /* CPU notfiers are defined in include/linux/cpu.h. */ /* netdevice notifiers are defined in include/linux/netdevice.h */ /* reboot notifiers are defined in include/linux/reboot.h. */ /* Hibernation and suspend events are defined in include/linux/suspend.h. */ /* Virtual Terminal events are defined in include/linux/vt.h. */ #define NETLINK_URELEASE 0x0001 /* Unicast netlink socket released */ /* Console keyboard events. * Note: KBD_KEYCODE is always sent before KBD_UNBOUND_KEYCODE, KBD_UNICODE and * KBD_KEYSYM. */ #define KBD_KEYCODE 0x0001 /* Keyboard keycode, called before any other */ #define KBD_UNBOUND_KEYCODE 0x0002 /* Keyboard keycode which is not bound to any other */ #define KBD_UNICODE 0x0003 /* Keyboard unicode */ #define KBD_KEYSYM 0x0004 /* Keyboard keysym */ #define KBD_POST_KEYSYM 0x0005 /* Called after keyboard keysym interpretation */ extern struct blocking_notifier_head reboot_notifier_list; #endif /* __KERNEL__ */ #endif /* _LINUX_NOTIFIER_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/pm_qos.h> static inline void device_pm_init_common(struct device *dev) { if (!dev->power.early_init) { spin_lock_init(&dev->power.lock); dev->power.qos = NULL; dev->power.early_init = true; } } #ifdef CONFIG_PM static inline void pm_runtime_early_init(struct device *dev) { dev->power.disable_depth = 1; device_pm_init_common(dev); } extern void pm_runtime_init(struct device *dev); extern void pm_runtime_reinit(struct device *dev); extern void pm_runtime_remove(struct device *dev); extern u64 pm_runtime_active_time(struct device *dev); #define WAKE_IRQ_DEDICATED_ALLOCATED BIT(0) #define WAKE_IRQ_DEDICATED_MANAGED BIT(1) #define WAKE_IRQ_DEDICATED_MASK (WAKE_IRQ_DEDICATED_ALLOCATED | \ WAKE_IRQ_DEDICATED_MANAGED) struct wake_irq { struct device *dev; unsigned int status; int irq; const char *name; }; extern void dev_pm_arm_wake_irq(struct wake_irq *wirq); extern void dev_pm_disarm_wake_irq(struct wake_irq *wirq); extern void dev_pm_enable_wake_irq_check(struct device *dev, bool can_change_status); extern void dev_pm_disable_wake_irq_check(struct device *dev); #ifdef CONFIG_PM_SLEEP extern void device_wakeup_attach_irq(struct device *dev, struct wake_irq *wakeirq); extern void device_wakeup_detach_irq(struct device *dev); extern void device_wakeup_arm_wake_irqs(void); extern void device_wakeup_disarm_wake_irqs(void); #else static inline void device_wakeup_attach_irq(struct device *dev, struct wake_irq *wakeirq) {} static inline void device_wakeup_detach_irq(struct device *dev) { } #endif /* CONFIG_PM_SLEEP */ /* * sysfs.c */ extern int dpm_sysfs_add(struct device *dev); extern void dpm_sysfs_remove(struct device *dev); extern void rpm_sysfs_remove(struct device *dev); extern int wakeup_sysfs_add(struct device *dev); extern void wakeup_sysfs_remove(struct device *dev); extern int pm_qos_sysfs_add_resume_latency(struct device *dev); extern void pm_qos_sysfs_remove_resume_latency(struct device *dev); extern int pm_qos_sysfs_add_flags(struct device *dev); extern void pm_qos_sysfs_remove_flags(struct device *dev); extern int pm_qos_sysfs_add_latency_tolerance(struct device *dev); extern void pm_qos_sysfs_remove_latency_tolerance(struct device *dev); extern int dpm_sysfs_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid); #else /* CONFIG_PM */ static inline void pm_runtime_early_init(struct device *dev) { device_pm_init_common(dev); } static inline void pm_runtime_init(struct device *dev) {} static inline void pm_runtime_reinit(struct device *dev) {} static inline void pm_runtime_remove(struct device *dev) {} static inline int dpm_sysfs_add(struct device *dev) { return 0; } static inline void dpm_sysfs_remove(struct device *dev) {} static inline int dpm_sysfs_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) { return 0; } #endif #ifdef CONFIG_PM_SLEEP /* kernel/power/main.c */ extern int pm_async_enabled; /* drivers/base/power/main.c */ extern struct list_head dpm_list; /* The active device list */ static inline struct device *to_device(struct list_head *entry) { return container_of(entry, struct device, power.entry); } extern void device_pm_sleep_init(struct device *dev); extern void device_pm_add(struct device *); extern void device_pm_remove(struct device *); extern void device_pm_move_before(struct device *, struct device *); extern void device_pm_move_after(struct device *, struct device *); extern void device_pm_move_last(struct device *); extern void device_pm_check_callbacks(struct device *dev); static inline bool device_pm_initialized(struct device *dev) { return dev->power.in_dpm_list; } /* drivers/base/power/wakeup_stats.c */ extern int wakeup_source_sysfs_add(struct device *parent, struct wakeup_source *ws); extern void wakeup_source_sysfs_remove(struct wakeup_source *ws); extern int pm_wakeup_source_sysfs_add(struct device *parent); #else /* !CONFIG_PM_SLEEP */ static inline void device_pm_sleep_init(struct device *dev) {} static inline void device_pm_add(struct device *dev) {} static inline void device_pm_remove(struct device *dev) { pm_runtime_remove(dev); } static inline void device_pm_move_before(struct device *deva, struct device *devb) {} static inline void device_pm_move_after(struct device *deva, struct device *devb) {} static inline void device_pm_move_last(struct device *dev) {} static inline void device_pm_check_callbacks(struct device *dev) {} static inline bool device_pm_initialized(struct device *dev) { return device_is_registered(dev); } static inline int pm_wakeup_source_sysfs_add(struct device *parent) { return 0; } #endif /* !CONFIG_PM_SLEEP */ static inline void device_pm_init(struct device *dev) { device_pm_init_common(dev); device_pm_sleep_init(dev); pm_runtime_init(dev); }
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BH_H #define _LINUX_BH_H #include <linux/preempt.h> #ifdef CONFIG_TRACE_IRQFLAGS extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); #else static __always_inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) { preempt_count_add(cnt); barrier(); } #endif static inline void local_bh_disable(void) { __local_bh_disable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); } extern void _local_bh_enable(void); extern void __local_bh_enable_ip(unsigned long ip, unsigned int cnt); static inline void local_bh_enable_ip(unsigned long ip) { __local_bh_enable_ip(ip, SOFTIRQ_DISABLE_OFFSET); } static inline void local_bh_enable(void) { __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); } #endif /* _LINUX_BH_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2001 Jens Axboe <axboe@suse.de> */ #ifndef __LINUX_BIO_H #define __LINUX_BIO_H #include <linux/highmem.h> #include <linux/mempool.h> #include <linux/ioprio.h> /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */ #include <linux/blk_types.h> #define BIO_DEBUG #ifdef BIO_DEBUG #define BIO_BUG_ON BUG_ON #else #define BIO_BUG_ON #endif #define BIO_MAX_PAGES 256 #define bio_prio(bio) (bio)->bi_ioprio #define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio) #define bio_iter_iovec(bio, iter) \ bvec_iter_bvec((bio)->bi_io_vec, (iter)) #define bio_iter_page(bio, iter) \ bvec_iter_page((bio)->bi_io_vec, (iter)) #define bio_iter_len(bio, iter) \ bvec_iter_len((bio)->bi_io_vec, (iter)) #define bio_iter_offset(bio, iter) \ bvec_iter_offset((bio)->bi_io_vec, (iter)) #define bio_page(bio) bio_iter_page((bio), (bio)->bi_iter) #define bio_offset(bio) bio_iter_offset((bio), (bio)->bi_iter) #define bio_iovec(bio) bio_iter_iovec((bio), (bio)->bi_iter) #define bvec_iter_sectors(iter) ((iter).bi_size >> 9) #define bvec_iter_end_sector(iter) ((iter).bi_sector + bvec_iter_sectors((iter))) #define bio_sectors(bio) bvec_iter_sectors((bio)->bi_iter) #define bio_end_sector(bio) bvec_iter_end_sector((bio)->bi_iter) /* * Return the data direction, READ or WRITE. */ #define bio_data_dir(bio) \ (op_is_write(bio_op(bio)) ? WRITE : READ) /* * Check whether this bio carries any data or not. A NULL bio is allowed. */ static inline bool bio_has_data(struct bio *bio) { if (bio && bio->bi_iter.bi_size && bio_op(bio) != REQ_OP_DISCARD && bio_op(bio) != REQ_OP_SECURE_ERASE && bio_op(bio) != REQ_OP_WRITE_ZEROES) return true; return false; } static inline bool bio_no_advance_iter(const struct bio *bio) { return bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE || bio_op(bio) == REQ_OP_WRITE_SAME || bio_op(bio) == REQ_OP_WRITE_ZEROES; } static inline bool bio_mergeable(struct bio *bio) { if (bio->bi_opf & REQ_NOMERGE_FLAGS) return false; return true; } static inline unsigned int bio_cur_bytes(struct bio *bio) { if (bio_has_data(bio)) return bio_iovec(bio).bv_len; else /* dataless requests such as discard */ return bio->bi_iter.bi_size; } static inline void *bio_data(struct bio *bio) { if (bio_has_data(bio)) return page_address(bio_page(bio)) + bio_offset(bio); return NULL; } /** * bio_full - check if the bio is full * @bio: bio to check * @len: length of one segment to be added * * Return true if @bio is full and one segment with @len bytes can't be * added to the bio, otherwise return false */ static inline bool bio_full(struct bio *bio, unsigned len) { if (bio->bi_vcnt >= bio->bi_max_vecs) return true; if (bio->bi_iter.bi_size > UINT_MAX - len) return true; return false; } static inline bool bio_next_segment(const struct bio *bio, struct bvec_iter_all *iter) { if (iter->idx >= bio->bi_vcnt) return false; bvec_advance(&bio->bi_io_vec[iter->idx], iter); return true; } /* * drivers should _never_ use the all version - the bio may have been split * before it got to the driver and the driver won't own all of it */ #define bio_for_each_segment_all(bvl, bio, iter) \ for (bvl = bvec_init_iter_all(&iter); bio_next_segment((bio), &iter); ) static inline void bio_advance_iter(const struct bio *bio, struct bvec_iter *iter, unsigned int bytes) { iter->bi_sector += bytes >> 9; if (bio_no_advance_iter(bio)) iter->bi_size -= bytes; else bvec_iter_advance(bio->bi_io_vec, iter, bytes); /* TODO: It is reasonable to complete bio with error here. */ } #define __bio_for_each_segment(bvl, bio, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = bio_iter_iovec((bio), (iter))), 1); \ bio_advance_iter((bio), &(iter), (bvl).bv_len)) #define bio_for_each_segment(bvl, bio, iter) \ __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter) #define __bio_for_each_bvec(bvl, bio, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = mp_bvec_iter_bvec((bio)->bi_io_vec, (iter))), 1); \ bio_advance_iter((bio), &(iter), (bvl).bv_len)) /* iterate over multi-page bvec */ #define bio_for_each_bvec(bvl, bio, iter) \ __bio_for_each_bvec(bvl, bio, iter, (bio)->bi_iter) /* * Iterate over all multi-page bvecs. Drivers shouldn't use this version for the * same reasons as bio_for_each_segment_all(). */ #define bio_for_each_bvec_all(bvl, bio, i) \ for (i = 0, bvl = bio_first_bvec_all(bio); \ i < (bio)->bi_vcnt; i++, bvl++) \ #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len) static inline unsigned bio_segments(struct bio *bio) { unsigned segs = 0; struct bio_vec bv; struct bvec_iter iter; /* * We special case discard/write same/write zeroes, because they * interpret bi_size differently: */ switch (bio_op(bio)) { case REQ_OP_DISCARD: case REQ_OP_SECURE_ERASE: case REQ_OP_WRITE_ZEROES: return 0; case REQ_OP_WRITE_SAME: return 1; default: break; } bio_for_each_segment(bv, bio, iter) segs++; return segs; } /* * get a reference to a bio, so it won't disappear. the intended use is * something like: * * bio_get(bio); * submit_bio(rw, bio); * if (bio->bi_flags ...) * do_something * bio_put(bio); * * without the bio_get(), it could potentially complete I/O before submit_bio * returns. and then bio would be freed memory when if (bio->bi_flags ...) * runs */ static inline void bio_get(struct bio *bio) { bio->bi_flags |= (1 << BIO_REFFED); smp_mb__before_atomic(); atomic_inc(&bio->__bi_cnt); } static inline void bio_cnt_set(struct bio *bio, unsigned int count) { if (count != 1) { bio->bi_flags |= (1 << BIO_REFFED); smp_mb(); } atomic_set(&bio->__bi_cnt, count); } static inline bool bio_flagged(struct bio *bio, unsigned int bit) { return (bio->bi_flags & (1U << bit)) != 0; } static inline void bio_set_flag(struct bio *bio, unsigned int bit) { bio->bi_flags |= (1U << bit); } static inline void bio_clear_flag(struct bio *bio, unsigned int bit) { bio->bi_flags &= ~(1U << bit); } static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv) { *bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); } static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv) { struct bvec_iter iter = bio->bi_iter; int idx; bio_get_first_bvec(bio, bv); if (bv->bv_len == bio->bi_iter.bi_size) return; /* this bio only has a single bvec */ bio_advance_iter(bio, &iter, iter.bi_size); if (!iter.bi_bvec_done) idx = iter.bi_idx - 1; else /* in the middle of bvec */ idx = iter.bi_idx; *bv = bio->bi_io_vec[idx]; /* * iter.bi_bvec_done records actual length of the last bvec * if this bio ends in the middle of one io vector */ if (iter.bi_bvec_done) bv->bv_len = iter.bi_bvec_done; } static inline struct bio_vec *bio_first_bvec_all(struct bio *bio) { WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); return bio->bi_io_vec; } static inline struct page *bio_first_page_all(struct bio *bio) { return bio_first_bvec_all(bio)->bv_page; } static inline struct bio_vec *bio_last_bvec_all(struct bio *bio) { WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); return &bio->bi_io_vec[bio->bi_vcnt - 1]; } enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */ BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */ BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */ }; /* * bio integrity payload */ struct bio_integrity_payload { struct bio *bip_bio; /* parent bio */ struct bvec_iter bip_iter; unsigned short bip_slab; /* slab the bip came from */ unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_max_vcnt; /* integrity bio_vec slots */ unsigned short bip_flags; /* control flags */ struct bvec_iter bio_iter; /* for rewinding parent bio */ struct work_struct bip_work; /* I/O completion */ struct bio_vec *bip_vec; struct bio_vec bip_inline_vecs[];/* embedded bvec array */ }; #if defined(CONFIG_BLK_DEV_INTEGRITY) static inline struct bio_integrity_payload *bio_integrity(struct bio *bio) { if (bio->bi_opf & REQ_INTEGRITY) return bio->bi_integrity; return NULL; } static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) { struct bio_integrity_payload *bip = bio_integrity(bio); if (bip) return bip->bip_flags & flag; return false; } static inline sector_t bip_get_seed(struct bio_integrity_payload *bip) { return bip->bip_iter.bi_sector; } static inline void bip_set_seed(struct bio_integrity_payload *bip, sector_t seed) { bip->bip_iter.bi_sector = seed; } #endif /* CONFIG_BLK_DEV_INTEGRITY */ extern void bio_trim(struct bio *bio, int offset, int size); extern struct bio *bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs); /** * bio_next_split - get next @sectors from a bio, splitting if necessary * @bio: bio to split * @sectors: number of sectors to split from the front of @bio * @gfp: gfp mask * @bs: bio set to allocate from * * Returns a bio representing the next @sectors of @bio - if the bio is smaller * than @sectors, returns the original bio unchanged. */ static inline struct bio *bio_next_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs) { if (sectors >= bio_sectors(bio)) return bio; return bio_split(bio, sectors, gfp, bs); } enum { BIOSET_NEED_BVECS = BIT(0), BIOSET_NEED_RESCUER = BIT(1), }; extern int bioset_init(struct bio_set *, unsigned int, unsigned int, int flags); extern void bioset_exit(struct bio_set *); extern int biovec_init_pool(mempool_t *pool, int pool_entries); extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src); extern struct bio *bio_alloc_bioset(gfp_t, unsigned int, struct bio_set *); extern void bio_put(struct bio *); extern void __bio_clone_fast(struct bio *, struct bio *); extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); extern struct bio_set fs_bio_set; static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) { return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set); } static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) { return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); } extern blk_qc_t submit_bio(struct bio *); extern void bio_endio(struct bio *); static inline void bio_io_error(struct bio *bio) { bio->bi_status = BLK_STS_IOERR; bio_endio(bio); } static inline void bio_wouldblock_error(struct bio *bio) { bio_set_flag(bio, BIO_QUIET); bio->bi_status = BLK_STS_AGAIN; bio_endio(bio); } struct request_queue; extern int submit_bio_wait(struct bio *bio); extern void bio_advance(struct bio *, unsigned); extern void bio_init(struct bio *bio, struct bio_vec *table, unsigned short max_vecs); extern void bio_uninit(struct bio *); extern void bio_reset(struct bio *); void bio_chain(struct bio *, struct bio *); extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); bool __bio_try_merge_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off, bool *same_page); void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); void bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, struct bio *src, struct bvec_iter *src_iter); extern void bio_copy_data(struct bio *dst, struct bio *src); extern void bio_list_copy_data(struct bio *dst, struct bio *src); extern void bio_free_pages(struct bio *bio); void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); void bio_truncate(struct bio *bio, unsigned new_size); void guard_bio_eod(struct bio *bio); static inline void zero_fill_bio(struct bio *bio) { zero_fill_bio_iter(bio, bio->bi_iter); } extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *); extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int); extern unsigned int bvec_nr_vecs(unsigned short idx); extern const char *bio_devname(struct bio *bio, char *buffer); #define bio_set_dev(bio, bdev) \ do { \ if ((bio)->bi_disk != (bdev)->bd_disk) \ bio_clear_flag(bio, BIO_THROTTLED);\ (bio)->bi_disk = (bdev)->bd_disk; \ (bio)->bi_partno = (bdev)->bd_partno; \ bio_associate_blkg(bio); \ } while (0) #define bio_copy_dev(dst, src) \ do { \ (dst)->bi_disk = (src)->bi_disk; \ (dst)->bi_partno = (src)->bi_partno; \ bio_clone_blkg_association(dst, src); \ } while (0) #define bio_dev(bio) \ disk_devt((bio)->bi_disk) #ifdef CONFIG_BLK_CGROUP void bio_associate_blkg(struct bio *bio); void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css); void bio_clone_blkg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ static inline void bio_associate_blkg(struct bio *bio) { } static inline void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css) { } static inline void bio_clone_blkg_association(struct bio *dst, struct bio *src) { } #endif /* CONFIG_BLK_CGROUP */ #ifdef CONFIG_HIGHMEM /* * remember never ever reenable interrupts between a bvec_kmap_irq and * bvec_kunmap_irq! */ static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) { unsigned long addr; /* * might not be a highmem page, but the preempt/irq count * balancing is a lot nicer this way */ local_irq_save(*flags); addr = (unsigned long) kmap_atomic(bvec->bv_page); BUG_ON(addr & ~PAGE_MASK); return (char *) addr + bvec->bv_offset; } static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) { unsigned long ptr = (unsigned long) buffer & PAGE_MASK; kunmap_atomic((void *) ptr); local_irq_restore(*flags); } #else static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) { return page_address(bvec->bv_page) + bvec->bv_offset; } static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) { *flags = 0; } #endif /* * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * * A bio_list anchors a singly-linked list of bios chained through the bi_next * member of the bio. The bio_list also caches the last list member to allow * fast access to the tail. */ struct bio_list { struct bio *head; struct bio *tail; }; static inline int bio_list_empty(const struct bio_list *bl) { return bl->head == NULL; } static inline void bio_list_init(struct bio_list *bl) { bl->head = bl->tail = NULL; } #define BIO_EMPTY_LIST { NULL, NULL } #define bio_list_for_each(bio, bl) \ for (bio = (bl)->head; bio; bio = bio->bi_next) static inline unsigned bio_list_size(const struct bio_list *bl) { unsigned sz = 0; struct bio *bio; bio_list_for_each(bio, bl) sz++; return sz; } static inline void bio_list_add(struct bio_list *bl, struct bio *bio) { bio->bi_next = NULL; if (bl->tail) bl->tail->bi_next = bio; else bl->head = bio; bl->tail = bio; } static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio) { bio->bi_next = bl->head; bl->head = bio; if (!bl->tail) bl->tail = bio; } static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) { if (!bl2->head) return; if (bl->tail) bl->tail->bi_next = bl2->head; else bl->head = bl2->head; bl->tail = bl2->tail; } static inline void bio_list_merge_head(struct bio_list *bl, struct bio_list *bl2) { if (!bl2->head) return; if (bl->head) bl2->tail->bi_next = bl->head; else bl->tail = bl2->tail; bl->head = bl2->head; } static inline struct bio *bio_list_peek(struct bio_list *bl) { return bl->head; } static inline struct bio *bio_list_pop(struct bio_list *bl) { struct bio *bio = bl->head; if (bio) { bl->head = bl->head->bi_next; if (!bl->head) bl->tail = NULL; bio->bi_next = NULL; } return bio; } static inline struct bio *bio_list_get(struct bio_list *bl) { struct bio *bio = bl->head; bl->head = bl->tail = NULL; return bio; } /* * Increment chain count for the bio. Make sure the CHAIN flag update * is visible before the raised count. */ static inline void bio_inc_remaining(struct bio *bio) { bio_set_flag(bio, BIO_CHAIN); smp_mb__before_atomic(); atomic_inc(&bio->__bi_remaining); } /* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. * These memory pools in turn all allocate from the bio_slab * and the bvec_slabs[]. */ #define BIO_POOL_SIZE 2 struct bio_set { struct kmem_cache *bio_slab; unsigned int front_pad; mempool_t bio_pool; mempool_t bvec_pool; #if defined(CONFIG_BLK_DEV_INTEGRITY) mempool_t bio_integrity_pool; mempool_t bvec_integrity_pool; #endif /* * Deadlock avoidance for stacking block drivers: see comments in * bio_alloc_bioset() for details */ spinlock_t rescue_lock; struct bio_list rescue_list; struct work_struct rescue_work; struct workqueue_struct *rescue_workqueue; }; struct biovec_slab { int nr_vecs; char *name; struct kmem_cache *slab; }; static inline bool bioset_initialized(struct bio_set *bs) { return bs->bio_slab != NULL; } /* * a small number of entries is fine, not going to be performance critical. * basically we just need to survive */ #define BIO_SPLIT_ENTRIES 2 #if defined(CONFIG_BLK_DEV_INTEGRITY) #define bip_for_each_vec(bvl, bip, iter) \ for_each_bvec(bvl, (bip)->bip_vec, iter, (bip)->bip_iter) #define bio_for_each_integrity_vec(_bvl, _bio, _iter) \ for_each_bio(_bio) \ bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); extern bool bio_integrity_prep(struct bio *); extern void bio_integrity_advance(struct bio *, unsigned int); extern void bio_integrity_trim(struct bio *); extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); extern int bioset_integrity_create(struct bio_set *, int); extern void bioset_integrity_free(struct bio_set *); extern void bio_integrity_init(void); #else /* CONFIG_BLK_DEV_INTEGRITY */ static inline void *bio_integrity(struct bio *bio) { return NULL; } static inline int bioset_integrity_create(struct bio_set *bs, int pool_size) { return 0; } static inline void bioset_integrity_free (struct bio_set *bs) { return; } static inline bool bio_integrity_prep(struct bio *bio) { return true; } static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) { return 0; } static inline void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) { return; } static inline void bio_integrity_trim(struct bio *bio) { return; } static inline void bio_integrity_init(void) { return; } static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) { return false; } static inline void *bio_integrity_alloc(struct bio * bio, gfp_t gfp, unsigned int nr) { return ERR_PTR(-EINVAL); } static inline int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { return 0; } #endif /* CONFIG_BLK_DEV_INTEGRITY */ /* * Mark a bio as polled. Note that for async polled IO, the caller must * expect -EWOULDBLOCK if we cannot allocate a request (or other resources). * We cannot block waiting for requests on polled IO, as those completions * must be found by the caller. This is different than IRQ driven IO, where * it's safe to wait for IO to complete. */ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) { bio->bi_opf |= REQ_HIPRI; if (!is_sync_kiocb(kiocb)) bio->bi_opf |= REQ_NOWAIT; } #endif /* __LINUX_BIO_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright 2006, Johannes Berg <johannes@sipsolutions.net> */ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/leds.h> #include "ieee80211_i.h" #define MAC80211_BLINK_DELAY 50 /* ms */ static inline void ieee80211_led_rx(struct ieee80211_local *local) { #ifdef CONFIG_MAC80211_LEDS unsigned long led_delay = MAC80211_BLINK_DELAY; if (!atomic_read(&local->rx_led_active)) return; led_trigger_blink_oneshot(&local->rx_led, &led_delay, &led_delay, 0); #endif } static inline void ieee80211_led_tx(struct ieee80211_local *local) { #ifdef CONFIG_MAC80211_LEDS unsigned long led_delay = MAC80211_BLINK_DELAY; if (!atomic_read(&local->tx_led_active)) return; led_trigger_blink_oneshot(&local->tx_led, &led_delay, &led_delay, 0); #endif } #ifdef CONFIG_MAC80211_LEDS void ieee80211_led_assoc(struct ieee80211_local *local, bool associated); void ieee80211_led_radio(struct ieee80211_local *local, bool enabled); void ieee80211_alloc_led_names(struct ieee80211_local *local); void ieee80211_free_led_names(struct ieee80211_local *local); void ieee80211_led_init(struct ieee80211_local *local); void ieee80211_led_exit(struct ieee80211_local *local); void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, unsigned int types_on, unsigned int types_off); #else static inline void ieee80211_led_assoc(struct ieee80211_local *local, bool associated) { } static inline void ieee80211_led_radio(struct ieee80211_local *local, bool enabled) { } static inline void ieee80211_alloc_led_names(struct ieee80211_local *local) { } static inline void ieee80211_free_led_names(struct ieee80211_local *local) { } static inline void ieee80211_led_init(struct ieee80211_local *local) { } static inline void ieee80211_led_exit(struct ieee80211_local *local) { } static inline void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, unsigned int types_on, unsigned int types_off) { } #endif static inline void ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, __le16 fc, int bytes) { #ifdef CONFIG_MAC80211_LEDS if (ieee80211_is_data(fc) && atomic_read(&local->tpt_led_active)) local->tpt_led_trigger->tx_bytes += bytes; #endif } static inline void ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, __le16 fc, int bytes) { #ifdef CONFIG_MAC80211_LEDS if (ieee80211_is_data(fc) && atomic_read(&local->tpt_led_active)) local->tpt_led_trigger->rx_bytes += bytes; #endif }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NETFILTER_INGRESS_H_ #define _NETFILTER_INGRESS_H_ #include <linux/netfilter.h> #include <linux/netdevice.h> #ifdef CONFIG_NETFILTER_INGRESS static inline bool nf_hook_ingress_active(const struct sk_buff *skb) { #ifdef CONFIG_JUMP_LABEL if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) return false; #endif return rcu_access_pointer(skb->dev->nf_hooks_ingress); } /* caller must hold rcu_read_lock */ static inline int nf_hook_ingress(struct sk_buff *skb) { struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); struct nf_hook_state state; int ret; /* Must recheck the ingress hook head, in the event it became NULL * after the check in nf_hook_ingress_active evaluated to true. */ if (unlikely(!e)) return 0; nf_hook_state_init(&state, NF_NETDEV_INGRESS, NFPROTO_NETDEV, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); ret = nf_hook_slow(skb, &state, e, 0); if (ret == 0) return -1; return ret; } static inline void nf_hook_ingress_init(struct net_device *dev) { RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); } #else /* CONFIG_NETFILTER_INGRESS */ static inline int nf_hook_ingress_active(struct sk_buff *skb) { return 0; } static inline int nf_hook_ingress(struct sk_buff *skb) { return 0; } static inline void nf_hook_ingress_init(struct net_device *dev) {} #endif /* CONFIG_NETFILTER_INGRESS */ #endif /* _NETFILTER_INGRESS_H_ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 /* SPDX-License-Identifier: GPL-2.0 */ /* * workqueue.h --- work queue handling for Linux. */ #ifndef _LINUX_WORKQUEUE_H #define _LINUX_WORKQUEUE_H #include <linux/timer.h> #include <linux/linkage.h> #include <linux/bitops.h> #include <linux/lockdep.h> #include <linux/threads.h> #include <linux/atomic.h> #include <linux/cpumask.h> #include <linux/rcupdate.h> struct workqueue_struct; struct work_struct; typedef void (*work_func_t)(struct work_struct *work); void delayed_work_timer_fn(struct timer_list *t); /* * The first word is the work queue pointer and the flags rolled into * one */ #define work_data_bits(work) ((unsigned long *)(&(work)->data)) enum { WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */ WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */ WORK_STRUCT_PWQ_BIT = 2, /* data points to pwq */ WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */ #ifdef CONFIG_DEBUG_OBJECTS_WORK WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */ WORK_STRUCT_COLOR_SHIFT = 5, /* color for workqueue flushing */ #else WORK_STRUCT_COLOR_SHIFT = 4, /* color for workqueue flushing */ #endif WORK_STRUCT_COLOR_BITS = 4, WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT, WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT, WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT, #ifdef CONFIG_DEBUG_OBJECTS_WORK WORK_STRUCT_STATIC = 1 << WORK_STRUCT_STATIC_BIT, #else WORK_STRUCT_STATIC = 0, #endif /* * The last color is no color used for works which don't * participate in workqueue flushing. */ WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS) - 1, WORK_NO_COLOR = WORK_NR_COLORS, /* not bound to any CPU, prefer the local CPU */ WORK_CPU_UNBOUND = NR_CPUS, /* * Reserve 8 bits off of pwq pointer w/ debugobjects turned off. * This makes pwqs aligned to 256 bytes and allows 15 workqueue * flush colors. */ WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS, /* data contains off-queue information when !WORK_STRUCT_PWQ */ WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT, __WORK_OFFQ_CANCELING = WORK_OFFQ_FLAG_BASE, WORK_OFFQ_CANCELING = (1 << __WORK_OFFQ_CANCELING), /* * When a work item is off queue, its high bits point to the last * pool it was on. Cap at 31 bits and use the highest number to * indicate that no pool is associated. */ WORK_OFFQ_FLAG_BITS = 1, WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS, WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT, WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31, WORK_OFFQ_POOL_NONE = (1LU << WORK_OFFQ_POOL_BITS) - 1, /* convenience constants */ WORK_STRUCT_FLAG_MASK = (1UL << WORK_STRUCT_FLAG_BITS) - 1, WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK, WORK_STRUCT_NO_POOL = (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT, /* bit mask for work_busy() return values */ WORK_BUSY_PENDING = 1 << 0, WORK_BUSY_RUNNING = 1 << 1, /* maximum string length for set_worker_desc() */ WORKER_DESC_LEN = 24, }; struct work_struct { atomic_long_t data; struct list_head entry; work_func_t func; #ifdef CONFIG_LOCKDEP struct lockdep_map lockdep_map; #endif }; #define WORK_DATA_INIT() ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL) #define WORK_DATA_STATIC_INIT() \ ATOMIC_LONG_INIT((unsigned long)(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC)) struct delayed_work { struct work_struct work; struct timer_list timer; /* target workqueue and CPU ->timer uses to queue ->work */ struct workqueue_struct *wq; int cpu; }; struct rcu_work { struct work_struct work; struct rcu_head rcu; /* target workqueue ->rcu uses to queue ->work */ struct workqueue_struct *wq; }; /** * struct workqueue_attrs - A struct for workqueue attributes. * * This can be used to change attributes of an unbound workqueue. */ struct workqueue_attrs { /** * @nice: nice level */ int nice; /** * @cpumask: allowed CPUs */ cpumask_var_t cpumask; /** * @no_numa: disable NUMA affinity * * Unlike other fields, ``no_numa`` isn't a property of a worker_pool. It * only modifies how :c:func:`apply_workqueue_attrs` select pools and thus * doesn't participate in pool hash calculations or equality comparisons. */ bool no_numa; }; static inline struct delayed_work *to_delayed_work(struct work_struct *work) { return container_of(work, struct delayed_work, work); } static inline struct rcu_work *to_rcu_work(struct work_struct *work) { return container_of(work, struct rcu_work, work); } struct execute_work { struct work_struct work; }; #ifdef CONFIG_LOCKDEP /* * NB: because we have to copy the lockdep_map, setting _key * here is required, otherwise it could get initialised to the * copy of the lockdep_map! */ #define __WORK_INIT_LOCKDEP_MAP(n, k) \ .lockdep_map = STATIC_LOCKDEP_MAP_INIT(n, k), #else #define __WORK_INIT_LOCKDEP_MAP(n, k) #endif #define __WORK_INITIALIZER(n, f) { \ .data = WORK_DATA_STATIC_INIT(), \ .entry = { &(n).entry, &(n).entry }, \ .func = (f), \ __WORK_INIT_LOCKDEP_MAP(#n, &(n)) \ } #define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \ .work = __WORK_INITIALIZER((n).work, (f)), \ .timer = __TIMER_INITIALIZER(delayed_work_timer_fn,\ (tflags) | TIMER_IRQSAFE), \ } #define DECLARE_WORK(n, f) \ struct work_struct n = __WORK_INITIALIZER(n, f) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, 0) #define DECLARE_DEFERRABLE_WORK(n, f) \ struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, TIMER_DEFERRABLE) #ifdef CONFIG_DEBUG_OBJECTS_WORK extern void __init_work(struct work_struct *work, int onstack); extern void destroy_work_on_stack(struct work_struct *work); extern void destroy_delayed_work_on_stack(struct delayed_work *work); static inline unsigned int work_static(struct work_struct *work) { return *work_data_bits(work) & WORK_STRUCT_STATIC; } #else static inline void __init_work(struct work_struct *work, int onstack) { } static inline void destroy_work_on_stack(struct work_struct *work) { } static inline void destroy_delayed_work_on_stack(struct delayed_work *work) { } static inline unsigned int work_static(struct work_struct *work) { return 0; } #endif /* * initialize all of a work item in one go * * NOTE! No point in using "atomic_long_set()": using a direct * assignment of the work data initializer allows the compiler * to generate better code. */ #ifdef CONFIG_LOCKDEP #define __INIT_WORK(_work, _func, _onstack) \ do { \ static struct lock_class_key __key; \ \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, &__key, 0); \ INIT_LIST_HEAD(&(_work)->entry); \ (_work)->func = (_func); \ } while (0) #else #define __INIT_WORK(_work, _func, _onstack) \ do { \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ INIT_LIST_HEAD(&(_work)->entry); \ (_work)->func = (_func); \ } while (0) #endif #define INIT_WORK(_work, _func) \ __INIT_WORK((_work), (_func), 0) #define INIT_WORK_ONSTACK(_work, _func) \ __INIT_WORK((_work), (_func), 1) #define __INIT_DELAYED_WORK(_work, _func, _tflags) \ do { \ INIT_WORK(&(_work)->work, (_func)); \ __init_timer(&(_work)->timer, \ delayed_work_timer_fn, \ (_tflags) | TIMER_IRQSAFE); \ } while (0) #define __INIT_DELAYED_WORK_ONSTACK(_work, _func, _tflags) \ do { \ INIT_WORK_ONSTACK(&(_work)->work, (_func)); \ __init_timer_on_stack(&(_work)->timer, \ delayed_work_timer_fn, \ (_tflags) | TIMER_IRQSAFE); \ } while (0) #define INIT_DELAYED_WORK(_work, _func) \ __INIT_DELAYED_WORK(_work, _func, 0) #define INIT_DELAYED_WORK_ONSTACK(_work, _func) \ __INIT_DELAYED_WORK_ONSTACK(_work, _func, 0) #define INIT_DEFERRABLE_WORK(_work, _func) \ __INIT_DELAYED_WORK(_work, _func, TIMER_DEFERRABLE) #define INIT_DEFERRABLE_WORK_ONSTACK(_work, _func) \ __INIT_DELAYED_WORK_ONSTACK(_work, _func, TIMER_DEFERRABLE) #define INIT_RCU_WORK(_work, _func) \ INIT_WORK(&(_work)->work, (_func)) #define INIT_RCU_WORK_ONSTACK(_work, _func) \ INIT_WORK_ONSTACK(&(_work)->work, (_func)) /** * work_pending - Find out whether a work item is currently pending * @work: The work item in question */ #define work_pending(work) \ test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) /** * delayed_work_pending - Find out whether a delayable work item is currently * pending * @w: The work item in question */ #define delayed_work_pending(w) \ work_pending(&(w)->work) /* * Workqueue flags and constants. For details, please refer to * Documentation/core-api/workqueue.rst. */ enum { WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ WQ_FREEZABLE = 1 << 2, /* freeze during suspend */ WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ WQ_HIGHPRI = 1 << 4, /* high priority */ WQ_CPU_INTENSIVE = 1 << 5, /* cpu intensive workqueue */ WQ_SYSFS = 1 << 6, /* visible in sysfs, see wq_sysfs_register() */ /* * Per-cpu workqueues are generally preferred because they tend to * show better performance thanks to cache locality. Per-cpu * workqueues exclude the scheduler from choosing the CPU to * execute the worker threads, which has an unfortunate side effect * of increasing power consumption. * * The scheduler considers a CPU idle if it doesn't have any task * to execute and tries to keep idle cores idle to conserve power; * however, for example, a per-cpu work item scheduled from an * interrupt handler on an idle CPU will force the scheduler to * excute the work item on that CPU breaking the idleness, which in * turn may lead to more scheduling choices which are sub-optimal * in terms of power consumption. * * Workqueues marked with WQ_POWER_EFFICIENT are per-cpu by default * but become unbound if workqueue.power_efficient kernel param is * specified. Per-cpu workqueues which are identified to * contribute significantly to power-consumption are identified and * marked with this flag and enabling the power_efficient mode * leads to noticeable power saving at the cost of small * performance disadvantage. * * http://thread.gmane.org/gmane.linux.kernel/1480396 */ WQ_POWER_EFFICIENT = 1 << 7, __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ __WQ_ORDERED_EXPLICIT = 1 << 19, /* internal: alloc_ordered_workqueue() */ WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, }; /* unbound wq's aren't per-cpu, scale max_active according to #cpus */ #define WQ_UNBOUND_MAX_ACTIVE \ max_t(int, WQ_MAX_ACTIVE, num_possible_cpus() * WQ_MAX_UNBOUND_PER_CPU) /* * System-wide workqueues which are always present. * * system_wq is the one used by schedule[_delayed]_work[_on](). * Multi-CPU multi-threaded. There are users which expect relatively * short queue flush time. Don't queue works which can run for too * long. * * system_highpri_wq is similar to system_wq but for work items which * require WQ_HIGHPRI. * * system_long_wq is similar to system_wq but may host long running * works. Queue flushing might take relatively long. * * system_unbound_wq is unbound workqueue. Workers are not bound to * any specific CPU, not concurrency managed, and all queued works are * executed immediately as long as max_active limit is not reached and * resources are available. * * system_freezable_wq is equivalent to system_wq except that it's * freezable. * * *_power_efficient_wq are inclined towards saving power and converted * into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise, * they are same as their non-power-efficient counterparts - e.g. * system_power_efficient_wq is identical to system_wq if * 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info. */ extern struct workqueue_struct *system_wq; extern struct workqueue_struct *system_highpri_wq; extern struct workqueue_struct *system_long_wq; extern struct workqueue_struct *system_unbound_wq; extern struct workqueue_struct *system_freezable_wq; extern struct workqueue_struct *system_power_efficient_wq; extern struct workqueue_struct *system_freezable_power_efficient_wq; /** * alloc_workqueue - allocate a workqueue * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags * @max_active: max in-flight work items, 0 for default * remaining args: args for @fmt * * Allocate a workqueue with the specified parameters. For detailed * information on WQ_* flags, please refer to * Documentation/core-api/workqueue.rst. * * RETURNS: * Pointer to the allocated workqueue on success, %NULL on failure. */ struct workqueue_struct *alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); /** * alloc_ordered_workqueue - allocate an ordered workqueue * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful) * @args...: args for @fmt * * Allocate an ordered workqueue. An ordered workqueue executes at * most one work item at any given time in the queued order. They are * implemented as unbound workqueues with @max_active of one. * * RETURNS: * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue(fmt, flags, args...) \ alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | \ __WQ_ORDERED_EXPLICIT | (flags), 1, ##args) #define create_workqueue(name) \ alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name)) #define create_freezable_workqueue(name) \ alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \ WQ_MEM_RECLAIM, 1, (name)) #define create_singlethread_workqueue(name) \ alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name) extern void destroy_workqueue(struct workqueue_struct *wq); struct workqueue_attrs *alloc_workqueue_attrs(void); void free_workqueue_attrs(struct workqueue_attrs *attrs); int apply_workqueue_attrs(struct workqueue_struct *wq, const struct workqueue_attrs *attrs); int workqueue_set_unbound_cpumask(cpumask_var_t cpumask); extern bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work); extern bool queue_work_node(int node, struct workqueue_struct *wq, struct work_struct *work); extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay); extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay); extern bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork); extern void flush_workqueue(struct workqueue_struct *wq); extern void drain_workqueue(struct workqueue_struct *wq); extern int schedule_on_each_cpu(work_func_t func); int execute_in_process_context(work_func_t fn, struct execute_work *); extern bool flush_work(struct work_struct *work); extern bool cancel_work_sync(struct work_struct *work); extern bool flush_delayed_work(struct delayed_work *dwork); extern bool cancel_delayed_work(struct delayed_work *dwork); extern bool cancel_delayed_work_sync(struct delayed_work *dwork); extern bool flush_rcu_work(struct rcu_work *rwork); extern void workqueue_set_max_active(struct workqueue_struct *wq, int max_active); extern struct work_struct *current_work(void); extern bool current_is_workqueue_rescuer(void); extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); extern unsigned int work_busy(struct work_struct *work); extern __printf(1, 2) void set_worker_desc(const char *fmt, ...); extern void print_worker_info(const char *log_lvl, struct task_struct *task); extern void show_workqueue_state(void); extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task); /** * queue_work - queue work on a workqueue * @wq: workqueue to use * @work: work to queue * * Returns %false if @work was already on a queue, %true otherwise. * * We queue the work to the CPU on which it was submitted, but if the CPU dies * it can be processed by another CPU. * * Memory-ordering properties: If it returns %true, guarantees that all stores * preceding the call to queue_work() in the program order will be visible from * the CPU which will execute @work by the time such work executes, e.g., * * { x is initially 0 } * * CPU0 CPU1 * * WRITE_ONCE(x, 1); [ @work is being executed ] * r0 = queue_work(wq, work); r1 = READ_ONCE(x); * * Forbids: r0 == true && r1 == 0 */ static inline bool queue_work(struct workqueue_struct *wq, struct work_struct *work) { return queue_work_on(WORK_CPU_UNBOUND, wq, work); } /** * queue_delayed_work - queue work on a workqueue after delay * @wq: workqueue to use * @dwork: delayable work to queue * @delay: number of jiffies to wait before queueing * * Equivalent to queue_delayed_work_on() but tries to use the local CPU. */ static inline bool queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay) { return queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay); } /** * mod_delayed_work - modify delay of or queue a delayed work * @wq: workqueue to use * @dwork: work to queue * @delay: number of jiffies to wait before queueing * * mod_delayed_work_on() on local CPU. */ static inline bool mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay) { return mod_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay); } /** * schedule_work_on - put work task on a specific cpu * @cpu: cpu to put the work task on * @work: job to be done * * This puts a job on a specific cpu */ static inline bool schedule_work_on(int cpu, struct work_struct *work) { return queue_work_on(cpu, system_wq, work); } /** * schedule_work - put work task in global workqueue * @work: job to be done * * Returns %false if @work was already on the kernel-global workqueue and * %true otherwise. * * This puts a job in the kernel-global workqueue if it was not already * queued and leaves it in the same position on the kernel-global * workqueue otherwise. * * Shares the same memory-ordering properties of queue_work(), cf. the * DocBook header of queue_work(). */ static inline bool schedule_work(struct work_struct *work) { return queue_work(system_wq, work); } /** * flush_scheduled_work - ensure that any scheduled work has run to completion. * * Forces execution of the kernel-global workqueue and blocks until its * completion. * * Think twice before calling this function! It's very easy to get into * trouble if you don't take great care. Either of the following situations * will lead to deadlock: * * One of the work items currently on the workqueue needs to acquire * a lock held by your code or its caller. * * Your code is running in the context of a work routine. * * They will be detected by lockdep when they occur, but the first might not * occur very often. It depends on what work items are on the workqueue and * what locks they need, which you have no control over. * * In most situations flushing the entire workqueue is overkill; you merely * need to know that a particular work item isn't queued and isn't running. * In such cases you should use cancel_delayed_work_sync() or * cancel_work_sync() instead. */ static inline void flush_scheduled_work(void) { flush_workqueue(system_wq); } /** * schedule_delayed_work_on - queue work in global workqueue on CPU after delay * @cpu: cpu to use * @dwork: job to be done * @delay: number of jiffies to wait * * After waiting for a given time this puts a job in the kernel-global * workqueue on the specified CPU. */ static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay) { return queue_delayed_work_on(cpu, system_wq, dwork, delay); } /** * schedule_delayed_work - put work task in global workqueue after delay * @dwork: job to be done * @delay: number of jiffies to wait or 0 for immediate execution * * After waiting for a given time this puts a job in the kernel-global * workqueue. */ static inline bool schedule_delayed_work(struct delayed_work *dwork, unsigned long delay) { return queue_delayed_work(system_wq, dwork, delay); } #ifndef CONFIG_SMP static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg) { return fn(arg); } static inline long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg) { return fn(arg); } #else long work_on_cpu(int cpu, long (*fn)(void *), void *arg); long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg); #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER extern void freeze_workqueues_begin(void); extern bool freeze_workqueues_busy(void); extern void thaw_workqueues(void); #endif /* CONFIG_FREEZER */ #ifdef CONFIG_SYSFS int workqueue_sysfs_register(struct workqueue_struct *wq); #else /* CONFIG_SYSFS */ static inline int workqueue_sysfs_register(struct workqueue_struct *wq) { return 0; } #endif /* CONFIG_SYSFS */ #ifdef CONFIG_WQ_WATCHDOG void wq_watchdog_touch(int cpu); #else /* CONFIG_WQ_WATCHDOG */ static inline void wq_watchdog_touch(int cpu) { } #endif /* CONFIG_WQ_WATCHDOG */ #ifdef CONFIG_SMP int workqueue_prepare_cpu(unsigned int cpu); int workqueue_online_cpu(unsigned int cpu); int workqueue_offline_cpu(unsigned int cpu); #endif void __init workqueue_init_early(void); void __init workqueue_init(void); #endif
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_CPUFEATURE_H #define _ASM_X86_CPUFEATURE_H #include <asm/processor.h> #if defined(__KERNEL__) && !defined(__ASSEMBLY__) #include <asm/asm.h> #include <linux/bitops.h> enum cpuid_leafs { CPUID_1_EDX = 0, CPUID_8000_0001_EDX, CPUID_8086_0001_EDX, CPUID_LNX_1, CPUID_1_ECX, CPUID_C000_0001_EDX, CPUID_8000_0001_ECX, CPUID_LNX_2, CPUID_LNX_3, CPUID_7_0_EBX, CPUID_D_1_EAX, CPUID_LNX_4, CPUID_7_1_EAX, CPUID_8000_0008_EBX, CPUID_6_EAX, CPUID_8000_000A_EDX, CPUID_7_ECX, CPUID_8000_0007_EBX, CPUID_7_EDX, }; #ifdef CONFIG_X86_FEATURE_NAMES extern const char * const x86_cap_flags[NCAPINTS*32]; extern const char * const x86_power_flags[32]; #define X86_CAP_FMT "%s" #define x86_cap_flag(flag) x86_cap_flags[flag] #else #define X86_CAP_FMT "%d:%d" #define x86_cap_flag(flag) ((flag) >> 5), ((flag) & 31) #endif /* * In order to save room, we index into this array by doing * X86_BUG_<name> - NCAPINTS*32. */ extern const char * const x86_bug_flags[NBUGINTS*32]; #define test_cpu_cap(c, bit) \ test_bit(bit, (unsigned long *)((c)->x86_capability)) /* * There are 32 bits/features in each mask word. The high bits * (selected with (bit>>5) give us the word number and the low 5 * bits give us the bit/feature number inside the word. * (1UL<<((bit)&31) gives us a mask for the feature_bit so we can * see if it is set in the mask word. */ #define CHECK_BIT_IN_MASK_WORD(maskname, word, bit) \ (((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word )) /* * {REQUIRED,DISABLED}_MASK_CHECK below may seem duplicated with the * following BUILD_BUG_ON_ZERO() check but when NCAPINTS gets changed, all * header macros which use NCAPINTS need to be changed. The duplicated macro * use causes the compiler to issue errors for all headers so that all usage * sites can be corrected. */ #define REQUIRED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 0, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 1, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 2, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 3, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 4, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 5, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 6, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 7, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 8, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 9, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 10, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 11, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 12, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 13, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 14, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ REQUIRED_MASK_CHECK || \ BUILD_BUG_ON_ZERO(NCAPINTS != 19)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 1, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 2, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 3, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 4, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 5, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 6, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 7, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 8, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 9, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 10, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 11, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 12, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 13, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 14, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ DISABLED_MASK_CHECK || \ BUILD_BUG_ON_ZERO(NCAPINTS != 19)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ test_cpu_cap(c, bit)) #define this_cpu_has(bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ x86_this_cpu_test_bit(bit, \ (unsigned long __percpu *)&cpu_info.x86_capability)) /* * This macro is for detection of features which need kernel * infrastructure to be used. It may *not* directly test the CPU * itself. Use the cpu_has() family if you want true runtime * testing of CPU features, like in hypervisor code where you are * supporting a possible guest feature where host support for it * is not relevant. */ #define cpu_feature_enabled(bit) \ (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit)) #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) #define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) extern void setup_clear_cpu_cap(unsigned int bit); extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); #define setup_force_cpu_cap(bit) do { \ set_cpu_cap(&boot_cpu_data, bit); \ set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) #if defined(__clang__) && !defined(CONFIG_CC_HAS_ASM_GOTO) /* * Workaround for the sake of BPF compilation which utilizes kernel * headers, but clang does not support ASM GOTO and fails the build. */ #ifndef __BPF_TRACING__ #warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments" #endif #define static_cpu_has(bit) boot_cpu_has(bit) #else /* * Static testing of CPU features. Used the same as boot_cpu_has(). It * statically patches the target code for additional performance. Use * static_cpu_has() only in fast paths, where every cycle counts. Which * means that the boot_cpu_has() variant is already fast enough for the * majority of cases and you should stick to using it as it is generally * only two instructions: a RIP-relative MOV and a TEST. */ static __always_inline bool _static_cpu_has(u16 bit) { asm_volatile_goto("1: jmp 6f\n" "2:\n" ".skip -(((5f-4f) - (2b-1b)) > 0) * " "((5f-4f) - (2b-1b)),0x90\n" "3:\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" /* src offset */ " .long 4f - .\n" /* repl offset */ " .word %P[always]\n" /* always replace */ " .byte 3b - 1b\n" /* src len */ " .byte 5f - 4f\n" /* repl len */ " .byte 3b - 2b\n" /* pad len */ ".previous\n" ".section .altinstr_replacement,\"ax\"\n" "4: jmp %l[t_no]\n" "5:\n" ".previous\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" /* src offset */ " .long 0\n" /* no replacement */ " .word %P[feature]\n" /* feature bit */ " .byte 3b - 1b\n" /* src len */ " .byte 0\n" /* repl len */ " .byte 0\n" /* pad len */ ".previous\n" ".section .altinstr_aux,\"ax\"\n" "6:\n" " testb %[bitnum],%[cap_byte]\n" " jnz %l[t_yes]\n" " jmp %l[t_no]\n" ".previous\n" : : [feature] "i" (bit), [always] "i" (X86_FEATURE_ALWAYS), [bitnum] "i" (1 << (bit & 7)), [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3]) : : t_yes, t_no); t_yes: return true; t_no: return false; } #define static_cpu_has(bit) \ ( \ __builtin_constant_p(boot_cpu_has(bit)) ? \ boot_cpu_has(bit) : \ _static_cpu_has(bit) \ ) #endif #define cpu_has_bug(c, bit) cpu_has(c, (bit)) #define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) #define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit)) #define static_cpu_has_bug(bit) static_cpu_has((bit)) #define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) #define boot_cpu_set_bug(bit) set_cpu_cap(&boot_cpu_data, (bit)) #define MAX_CPU_FEATURES (NCAPINTS * 32) #define cpu_have_feature boot_cpu_has #define CPU_FEATURE_TYPEFMT "x86,ven%04Xfam%04Xmod%04X" #define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \ boot_cpu_data.x86_model #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ #endif /* _ASM_X86_CPUFEATURE_H */
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 // SPDX-License-Identifier: GPL-2.0-or-later /* * Routines having to do with the 'struct sk_buff' memory handlers. * * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> * Florian La Roche <rzsfl@rz.uni-sb.de> * * Fixes: * Alan Cox : Fixed the worst of the load * balancer bugs. * Dave Platt : Interrupt stacking fix. * Richard Kooijman : Timestamp fixes. * Alan Cox : Changed buffer format. * Alan Cox : destructor hook for AF_UNIX etc. * Linus Torvalds : Better skb_clone. * Alan Cox : Added skb_copy. * Alan Cox : Added all the changed routines Linus * only put in the headers * Ray VanTassle : Fixed --skb->lock in free * Alan Cox : skb_copy copy arp field * Andi Kleen : slabified it. * Robert Olsson : Removed skb_head_pool * * NOTE: * The __skb_ routines should be called with interrupts * disabled, or you better be *real* sure that the operation is atomic * with respect to whatever list is being frobbed (e.g. via lock_sock() * or via disabling bottom half handlers, etc). */ /* * The functions in this file will not compile correctly with gcc 2.4.x */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/slab.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/sctp.h> #include <linux/netdevice.h> #ifdef CONFIG_NET_CLS_ACT #include <net/pkt_sched.h> #endif #include <linux/string.h> #include <linux/skbuff.h> #include <linux/splice.h> #include <linux/cache.h> #include <linux/rtnetlink.h> #include <linux/init.h> #include <linux/scatterlist.h> #include <linux/errqueue.h> #include <linux/prefetch.h> #include <linux/if_vlan.h> #include <linux/mpls.h> #include <net/protocol.h> #include <net/dst.h> #include <net/sock.h> #include <net/checksum.h> #include <net/ip6_checksum.h> #include <net/xfrm.h> #include <net/mpls.h> #include <net/mptcp.h> #include <linux/uaccess.h> #include <trace/events/skb.h> #include <linux/highmem.h> #include <linux/capability.h> #include <linux/user_namespace.h> #include <linux/indirect_call_wrapper.h> #include "datagram.h" struct kmem_cache *skbuff_head_cache __ro_after_init; static struct kmem_cache *skbuff_fclone_cache __ro_after_init; #ifdef CONFIG_SKB_EXTENSIONS static struct kmem_cache *skbuff_ext_cache __ro_after_init; #endif int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; EXPORT_SYMBOL(sysctl_max_skb_frags); /** * skb_panic - private function for out-of-line support * @skb: buffer * @sz: size * @addr: address * @msg: skb_over_panic or skb_under_panic * * Out-of-line support for skb_put() and skb_push(). * Called via the wrapper skb_over_panic() or skb_under_panic(). * Keep out of line to prevent kernel bloat. * __builtin_return_address is not used because it is not always reliable. */ static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr, const char msg[]) { pr_emerg("%s: text:%px len:%d put:%d head:%px data:%px tail:%#lx end:%#lx dev:%s\n", msg, addr, skb->len, sz, skb->head, skb->data, (unsigned long)skb->tail, (unsigned long)skb->end, skb->dev ? skb->dev->name : "<NULL>"); BUG(); } static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr) { skb_panic(skb, sz, addr, __func__); } static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr) { skb_panic(skb, sz, addr, __func__); } /* * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells * the caller if emergency pfmemalloc reserves are being used. If it is and * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves * may be used. Otherwise, the packet data may be discarded until enough * memory is free */ #define kmalloc_reserve(size, gfp, node, pfmemalloc) \ __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc) static void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip, bool *pfmemalloc) { void *obj; bool ret_pfmemalloc = false; /* * Try a regular allocation, when that fails and we're not entitled * to the reserves, fail. */ obj = kmalloc_node_track_caller(size, flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node); if (obj || !(gfp_pfmemalloc_allowed(flags))) goto out; /* Try again but now we are using pfmemalloc reserves */ ret_pfmemalloc = true; obj = kmalloc_node_track_caller(size, flags, node); out: if (pfmemalloc) *pfmemalloc = ret_pfmemalloc; return obj; } /* Allocate a new skbuff. We do this ourselves so we can fill in a few * 'private' fields and also do memory statistics to find all the * [BEEP] leaks. * */ /** * __alloc_skb - allocate a network buffer * @size: size to allocate * @gfp_mask: allocation mask * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache * instead of head cache and allocate a cloned (child) skb. * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for * allocations in case the data is required for writeback * @node: numa node to allocate memory on * * Allocate a new &sk_buff. The returned buffer has no headroom and a * tail room of at least size bytes. The object has a reference count * of one. The return is the buffer. On a failure the return is %NULL. * * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, int flags, int node) { struct kmem_cache *cache; struct skb_shared_info *shinfo; struct sk_buff *skb; u8 *data; bool pfmemalloc; cache = (flags & SKB_ALLOC_FCLONE) ? skbuff_fclone_cache : skbuff_head_cache; if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) gfp_mask |= __GFP_MEMALLOC; /* Get the HEAD */ skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); if (!skb) goto out; prefetchw(skb); /* We do our best to align skb_shared_info on a separate cache * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives * aligned memory blocks, unless SLUB/SLAB debug is enabled. * Both skb->head and skb_shared_info are cache line aligned. */ size = SKB_DATA_ALIGN(size); size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc); if (!data) goto nodata; /* kmalloc(size) might give us more room than requested. * Put skb_shared_info exactly at the end of allocated zone, * to allow max possible filling before reallocation. */ size = SKB_WITH_OVERHEAD(ksize(data)); prefetchw(data + size); /* * Only clear those fields we need to clear, not those that we will * actually initialise below. Hence, don't put any more fields after * the tail pointer in struct sk_buff! */ memset(skb, 0, offsetof(struct sk_buff, tail)); /* Account for allocated memory : skb + skb->head */ skb->truesize = SKB_TRUESIZE(size); skb->pfmemalloc = pfmemalloc; refcount_set(&skb->users, 1); skb->head = data; skb->data = data; skb_reset_tail_pointer(skb); skb->end = skb->tail + size; skb->mac_header = (typeof(skb->mac_header))~0U; skb->transport_header = (typeof(skb->transport_header))~0U; /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(&shinfo->dataref, 1); if (flags & SKB_ALLOC_FCLONE) { struct sk_buff_fclones *fclones; fclones = container_of(skb, struct sk_buff_fclones, skb1); skb->fclone = SKB_FCLONE_ORIG; refcount_set(&fclones->fclone_ref, 1); fclones->skb2.fclone = SKB_FCLONE_CLONE; } skb_set_kcov_handle(skb, kcov_common_handle()); out: return skb; nodata: kmem_cache_free(cache, skb); skb = NULL; goto out; } EXPORT_SYMBOL(__alloc_skb); /* Caller must provide SKB that is memset cleared */ static struct sk_buff *__build_skb_around(struct sk_buff *skb, void *data, unsigned int frag_size) { struct skb_shared_info *shinfo; unsigned int size = frag_size ? : ksize(data); size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); /* Assumes caller memset cleared SKB */ skb->truesize = SKB_TRUESIZE(size); refcount_set(&skb->users, 1); skb->head = data; skb->data = data; skb_reset_tail_pointer(skb); skb->end = skb->tail + size; skb->mac_header = (typeof(skb->mac_header))~0U; skb->transport_header = (typeof(skb->transport_header))~0U; /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(&shinfo->dataref, 1); skb_set_kcov_handle(skb, kcov_common_handle()); return skb; } /** * __build_skb - build a network buffer * @data: data buffer provided by caller * @frag_size: size of data, or 0 if head was kmalloced * * Allocate a new &sk_buff. Caller provides space holding head and * skb_shared_info. @data must have been allocated by kmalloc() only if * @frag_size is 0, otherwise data should come from the page allocator * or vmalloc() * The return is the new skb buffer. * On a failure the return is %NULL, and @data is not freed. * Notes : * Before IO, driver allocates only data buffer where NIC put incoming frame * Driver should add room at head (NET_SKB_PAD) and * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info)) * After IO, driver calls build_skb(), to allocate sk_buff and populate it * before giving packet to stack. * RX rings only contains data buffers, not full skbs. */ struct sk_buff *__build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb; skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC); if (unlikely(!skb)) return NULL; memset(skb, 0, offsetof(struct sk_buff, tail)); return __build_skb_around(skb, data, frag_size); } /* build_skb() is wrapper over __build_skb(), that specifically * takes care of skb->head and skb->pfmemalloc * This means that if @frag_size is not zero, then @data must be backed * by a page fragment, not kmalloc() or vmalloc() */ struct sk_buff *build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb = __build_skb(data, frag_size); if (skb && frag_size) { skb->head_frag = 1; if (page_is_pfmemalloc(virt_to_head_page(data))) skb->pfmemalloc = 1; } return skb; } EXPORT_SYMBOL(build_skb); /** * build_skb_around - build a network buffer around provided skb * @skb: sk_buff provide by caller, must be memset cleared * @data: data buffer provided by caller * @frag_size: size of data, or 0 if head was kmalloced */ struct sk_buff *build_skb_around(struct sk_buff *skb, void *data, unsigned int frag_size) { if (unlikely(!skb)) return NULL; skb = __build_skb_around(skb, data, frag_size); if (skb && frag_size) { skb->head_frag = 1; if (page_is_pfmemalloc(virt_to_head_page(data))) skb->pfmemalloc = 1; } return skb; } EXPORT_SYMBOL(build_skb_around); #define NAPI_SKB_CACHE_SIZE 64 struct napi_alloc_cache { struct page_frag_cache page; unsigned int skb_count; void *skb_cache[NAPI_SKB_CACHE_SIZE]; }; static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); return page_frag_alloc(&nc->page, fragsz, gfp_mask); } void *napi_alloc_frag(unsigned int fragsz) { fragsz = SKB_DATA_ALIGN(fragsz); return __napi_alloc_frag(fragsz, GFP_ATOMIC); } EXPORT_SYMBOL(napi_alloc_frag); /** * netdev_alloc_frag - allocate a page fragment * @fragsz: fragment size * * Allocates a frag from a page for receive buffer. * Uses GFP_ATOMIC allocations. */ void *netdev_alloc_frag(unsigned int fragsz) { struct page_frag_cache *nc; void *data; fragsz = SKB_DATA_ALIGN(fragsz); if (in_irq() || irqs_disabled()) { nc = this_cpu_ptr(&netdev_alloc_cache); data = page_frag_alloc(nc, fragsz, GFP_ATOMIC); } else { local_bh_disable(); data = __napi_alloc_frag(fragsz, GFP_ATOMIC); local_bh_enable(); } return data; } EXPORT_SYMBOL(netdev_alloc_frag); /** * __netdev_alloc_skb - allocate an skbuff for rx on a specific device * @dev: network device to receive on * @len: length to allocate * @gfp_mask: get_free_pages mask, passed to alloc_skb * * Allocate a new &sk_buff and assign it a usage count of one. The * buffer has NET_SKB_PAD headroom built in. Users should allocate * the headroom they think they need without accounting for the * built in space. The built in space is used for optimisations. * * %NULL is returned if there is no free memory. */ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, gfp_t gfp_mask) { struct page_frag_cache *nc; struct sk_buff *skb; bool pfmemalloc; void *data; len += NET_SKB_PAD; /* If requested length is either too small or too big, * we use kmalloc() for skb->head allocation. */ if (len <= SKB_WITH_OVERHEAD(1024) || len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); if (!skb) goto skb_fail; goto skb_success; } len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); len = SKB_DATA_ALIGN(len); if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; if (in_irq() || irqs_disabled()) { nc = this_cpu_ptr(&netdev_alloc_cache); data = page_frag_alloc(nc, len, gfp_mask); pfmemalloc = nc->pfmemalloc; } else { local_bh_disable(); nc = this_cpu_ptr(&napi_alloc_cache.page); data = page_frag_alloc(nc, len, gfp_mask); pfmemalloc = nc->pfmemalloc; local_bh_enable(); } if (unlikely(!data)) return NULL; skb = __build_skb(data, len); if (unlikely(!skb)) { skb_free_frag(data); return NULL; } if (pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; skb_success: skb_reserve(skb, NET_SKB_PAD); skb->dev = dev; skb_fail: return skb; } EXPORT_SYMBOL(__netdev_alloc_skb); /** * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance * @napi: napi instance this buffer was allocated for * @len: length to allocate * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages * * Allocate a new sk_buff for use in NAPI receive. This buffer will * attempt to allocate the head from a special reserved region used * only for NAPI Rx allocation. By doing this we can save several * CPU cycles by avoiding having to disable and re-enable IRQs. * * %NULL is returned if there is no free memory. */ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, gfp_t gfp_mask) { struct napi_alloc_cache *nc; struct sk_buff *skb; void *data; len += NET_SKB_PAD + NET_IP_ALIGN; /* If requested length is either too small or too big, * we use kmalloc() for skb->head allocation. */ if (len <= SKB_WITH_OVERHEAD(1024) || len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); if (!skb) goto skb_fail; goto skb_success; } nc = this_cpu_ptr(&napi_alloc_cache); len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); len = SKB_DATA_ALIGN(len); if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; data = page_frag_alloc(&nc->page, len, gfp_mask); if (unlikely(!data)) return NULL; skb = __build_skb(data, len); if (unlikely(!skb)) { skb_free_frag(data); return NULL; } if (nc->page.pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; skb_success: skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); skb->dev = napi->dev; skb_fail: return skb; } EXPORT_SYMBOL(__napi_alloc_skb); void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size, unsigned int truesize) { skb_fill_page_desc(skb, i, page, off, size); skb->len += size; skb->data_len += size; skb->truesize += truesize; } EXPORT_SYMBOL(skb_add_rx_frag); void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, unsigned int truesize) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; skb_frag_size_add(frag, size); skb->len += size; skb->data_len += size; skb->truesize += truesize; } EXPORT_SYMBOL(skb_coalesce_rx_frag); static void skb_drop_list(struct sk_buff **listp) { kfree_skb_list(*listp); *listp = NULL; } static inline void skb_drop_fraglist(struct sk_buff *skb) { skb_drop_list(&skb_shinfo(skb)->frag_list); } static void skb_clone_fraglist(struct sk_buff *skb) { struct sk_buff *list; skb_walk_frags(skb, list) skb_get(list); } static void skb_free_head(struct sk_buff *skb) { unsigned char *head = skb->head; if (skb->head_frag) skb_free_frag(head); else kfree(head); } static void skb_release_data(struct sk_buff *skb) { struct skb_shared_info *shinfo = skb_shinfo(skb); int i; if (skb->cloned && atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, &shinfo->dataref)) return; for (i = 0; i < shinfo->nr_frags; i++) __skb_frag_unref(&shinfo->frags[i]); if (shinfo->frag_list) kfree_skb_list(shinfo->frag_list); skb_zcopy_clear(skb, true); skb_free_head(skb); } /* * Free an skbuff by memory without cleaning the state. */ static void kfree_skbmem(struct sk_buff *skb) { struct sk_buff_fclones *fclones; switch (skb->fclone) { case SKB_FCLONE_UNAVAILABLE: kmem_cache_free(skbuff_head_cache, skb); return; case SKB_FCLONE_ORIG: fclones = container_of(skb, struct sk_buff_fclones, skb1); /* We usually free the clone (TX completion) before original skb * This test would have no chance to be true for the clone, * while here, branch prediction will be good. */ if (refcount_read(&fclones->fclone_ref) == 1) goto fastpath; break; default: /* SKB_FCLONE_CLONE */ fclones = container_of(skb, struct sk_buff_fclones, skb2); break; } if (!refcount_dec_and_test(&fclones->fclone_ref)) return; fastpath: kmem_cache_free(skbuff_fclone_cache, fclones); } void skb_release_head_state(struct sk_buff *skb) { nf_reset_ct(skb); skb_dst_drop(skb); if (skb->destructor) { WARN_ON(in_irq()); skb->destructor(skb); } #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_conntrack_put(skb_nfct(skb)); #endif skb_ext_put(skb); } /* Free everything but the sk_buff shell. */ static void skb_release_all(struct sk_buff *skb) { skb_release_head_state(skb); if (likely(skb->head)) skb_release_data(skb); } /** * __kfree_skb - private function * @skb: buffer * * Free an sk_buff. Release anything attached to the buffer. * Clean the state. This is an internal helper function. Users should * always call kfree_skb */ void __kfree_skb(struct sk_buff *skb) { skb_release_all(skb); kfree_skbmem(skb); } EXPORT_SYMBOL(__kfree_skb); /** * kfree_skb - free an sk_buff * @skb: buffer to free * * Drop a reference to the buffer and free it if the usage count has * hit zero. */ void kfree_skb(struct sk_buff *skb) { if (!skb_unref(skb)) return; trace_kfree_skb(skb, __builtin_return_address(0)); __kfree_skb(skb); } EXPORT_SYMBOL(kfree_skb); void kfree_skb_list(struct sk_buff *segs) { while (segs) { struct sk_buff *next = segs->next; kfree_skb(segs); segs = next; } } EXPORT_SYMBOL(kfree_skb_list); /* Dump skb information and contents. * * Must only be called from net_ratelimit()-ed paths. * * Dumps whole packets if full_pkt, only headers otherwise. */ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt) { struct skb_shared_info *sh = skb_shinfo(skb); struct net_device *dev = skb->dev; struct sock *sk = skb->sk; struct sk_buff *list_skb; bool has_mac, has_trans; int headroom, tailroom; int i, len, seg_len; if (full_pkt) len = skb->len; else len = min_t(int, skb->len, MAX_HEADER + 128); headroom = skb_headroom(skb); tailroom = skb_tailroom(skb); has_mac = skb_mac_header_was_set(skb); has_trans = skb_transport_header_was_set(skb); printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n" "mac=(%d,%d) net=(%d,%d) trans=%d\n" "shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n" "csum(0x%x ip_summed=%u complete_sw=%u valid=%u level=%u)\n" "hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n", level, skb->len, headroom, skb_headlen(skb), tailroom, has_mac ? skb->mac_header : -1, has_mac ? skb_mac_header_len(skb) : -1, skb->network_header, has_trans ? skb_network_header_len(skb) : -1, has_trans ? skb->transport_header : -1, sh->tx_flags, sh->nr_frags, sh->gso_size, sh->gso_type, sh->gso_segs, skb->csum, skb->ip_summed, skb->csum_complete_sw, skb->csum_valid, skb->csum_level, skb->hash, skb->sw_hash, skb->l4_hash, ntohs(skb->protocol), skb->pkt_type, skb->skb_iif); if (dev) printk("%sdev name=%s feat=%pNF\n", level, dev->name, &dev->features); if (sk) printk("%ssk family=%hu type=%u proto=%u\n", level, sk->sk_family, sk->sk_type, sk->sk_protocol); if (full_pkt && headroom) print_hex_dump(level, "skb headroom: ", DUMP_PREFIX_OFFSET, 16, 1, skb->head, headroom, false); seg_len = min_t(int, skb_headlen(skb), len); if (seg_len) print_hex_dump(level, "skb linear: ", DUMP_PREFIX_OFFSET, 16, 1, skb->data, seg_len, false); len -= seg_len; if (full_pkt && tailroom) print_hex_dump(level, "skb tailroom: ", DUMP_PREFIX_OFFSET, 16, 1, skb_tail_pointer(skb), tailroom, false); for (i = 0; len && i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; u32 p_off, p_len, copied; struct page *p; u8 *vaddr; skb_frag_foreach_page(frag, skb_frag_off(frag), skb_frag_size(frag), p, p_off, p_len, copied) { seg_len = min_t(int, p_len, len); vaddr = kmap_atomic(p); print_hex_dump(level, "skb frag: ", DUMP_PREFIX_OFFSET, 16, 1, vaddr + p_off, seg_len, false); kunmap_atomic(vaddr); len -= seg_len; if (!len) break; } } if (full_pkt && skb_has_frag_list(skb)) { printk("skb fraglist:\n"); skb_walk_frags(skb, list_skb) skb_dump(level, list_skb, true); } } EXPORT_SYMBOL(skb_dump); /** * skb_tx_error - report an sk_buff xmit error * @skb: buffer that triggered an error * * Report xmit error if a device callback is tracking this skb. * skb must be freed afterwards. */ void skb_tx_error(struct sk_buff *skb) { skb_zcopy_clear(skb, true); } EXPORT_SYMBOL(skb_tx_error); #ifdef CONFIG_TRACEPOINTS /** * consume_skb - free an skbuff * @skb: buffer to free * * Drop a ref to the buffer and free it if the usage count has hit zero * Functions identically to kfree_skb, but kfree_skb assumes that the frame * is being dropped after a failure and notes that */ void consume_skb(struct sk_buff *skb) { if (!skb_unref(skb)) return; trace_consume_skb(skb); __kfree_skb(skb); } EXPORT_SYMBOL(consume_skb); #endif /** * consume_stateless_skb - free an skbuff, assuming it is stateless * @skb: buffer to free * * Alike consume_skb(), but this variant assumes that this is the last * skb reference and all the head states have been already dropped */ void __consume_stateless_skb(struct sk_buff *skb) { trace_consume_skb(skb); skb_release_data(skb); kfree_skbmem(skb); } void __kfree_skb_flush(void) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); /* flush skb_cache if containing objects */ if (nc->skb_count) { kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count, nc->skb_cache); nc->skb_count = 0; } } static inline void _kfree_skb_defer(struct sk_buff *skb) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); /* drop skb->head and call any destructors for packet */ skb_release_all(skb); /* record skb to CPU local list */ nc->skb_cache[nc->skb_count++] = skb; #ifdef CONFIG_SLUB /* SLUB writes into objects when freeing */ prefetchw(skb); #endif /* flush skb_cache if it is filled */ if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) { kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE, nc->skb_cache); nc->skb_count = 0; } } void __kfree_skb_defer(struct sk_buff *skb) { _kfree_skb_defer(skb); } void napi_consume_skb(struct sk_buff *skb, int budget) { /* Zero budget indicate non-NAPI context called us, like netpoll */ if (unlikely(!budget)) { dev_consume_skb_any(skb); return; } if (!skb_unref(skb)) return; /* if reaching here SKB is ready to free */ trace_consume_skb(skb); /* if SKB is a clone, don't handle this case */ if (skb->fclone != SKB_FCLONE_UNAVAILABLE) { __kfree_skb(skb); return; } _kfree_skb_defer(skb); } EXPORT_SYMBOL(napi_consume_skb); /* Make sure a field is enclosed inside headers_start/headers_end section */ #define CHECK_SKB_FIELD(field) \ BUILD_BUG_ON(offsetof(struct sk_buff, field) < \ offsetof(struct sk_buff, headers_start)); \ BUILD_BUG_ON(offsetof(struct sk_buff, field) > \ offsetof(struct sk_buff, headers_end)); \ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { new->tstamp = old->tstamp; /* We do not copy old->sk */ new->dev = old->dev; memcpy(new->cb, old->cb, sizeof(old->cb)); skb_dst_copy(new, old); __skb_ext_copy(new, old); __nf_copy(new, old, false); /* Note : this field could be in headers_start/headers_end section * It is not yet because we do not want to have a 16 bit hole */ new->queue_mapping = old->queue_mapping; memcpy(&new->headers_start, &old->headers_start, offsetof(struct sk_buff, headers_end) - offsetof(struct sk_buff, headers_start)); CHECK_SKB_FIELD(protocol); CHECK_SKB_FIELD(csum); CHECK_SKB_FIELD(hash); CHECK_SKB_FIELD(priority); CHECK_SKB_FIELD(skb_iif); CHECK_SKB_FIELD(vlan_proto); CHECK_SKB_FIELD(vlan_tci); CHECK_SKB_FIELD(transport_header); CHECK_SKB_FIELD(network_header); CHECK_SKB_FIELD(mac_header); CHECK_SKB_FIELD(inner_protocol); CHECK_SKB_FIELD(inner_transport_header); CHECK_SKB_FIELD(inner_network_header); CHECK_SKB_FIELD(inner_mac_header); CHECK_SKB_FIELD(mark); #ifdef CONFIG_NETWORK_SECMARK CHECK_SKB_FIELD(secmark); #endif #ifdef CONFIG_NET_RX_BUSY_POLL CHECK_SKB_FIELD(napi_id); #endif #ifdef CONFIG_XPS CHECK_SKB_FIELD(sender_cpu); #endif #ifdef CONFIG_NET_SCHED CHECK_SKB_FIELD(tc_index); #endif } /* * You should not add any new code to this function. Add it to * __copy_skb_header above instead. */ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) { #define C(x) n->x = skb->x n->next = n->prev = NULL; n->sk = NULL; __copy_skb_header(n, skb); C(len); C(data_len); C(mac_len); n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; n->cloned = 1; n->nohdr = 0; n->peeked = 0; C(pfmemalloc); n->destructor = NULL; C(tail); C(end); C(head); C(head_frag); C(data); C(truesize); refcount_set(&n->users, 1); atomic_inc(&(skb_shinfo(skb)->dataref)); skb->cloned = 1; return n; #undef C } /** * alloc_skb_for_msg() - allocate sk_buff to wrap frag list forming a msg * @first: first sk_buff of the msg */ struct sk_buff *alloc_skb_for_msg(struct sk_buff *first) { struct sk_buff *n; n = alloc_skb(0, GFP_ATOMIC); if (!n) return NULL; n->len = first->len; n->data_len = first->len; n->truesize = first->truesize; skb_shinfo(n)->frag_list = first; __copy_skb_header(n, first); n->destructor = NULL; return n; } EXPORT_SYMBOL_GPL(alloc_skb_for_msg); /** * skb_morph - morph one skb into another * @dst: the skb to receive the contents * @src: the skb to supply the contents * * This is identical to skb_clone except that the target skb is * supplied by the user. * * The target skb is returned upon exit. */ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) { skb_release_all(dst); return __skb_clone(dst, src); } EXPORT_SYMBOL_GPL(skb_morph); int mm_account_pinned_pages(struct mmpin *mmp, size_t size) { unsigned long max_pg, num_pg, new_pg, old_pg; struct user_struct *user; if (capable(CAP_IPC_LOCK) || !size) return 0; num_pg = (size >> PAGE_SHIFT) + 2; /* worst case */ max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; user = mmp->user ? : current_user(); do { old_pg = atomic_long_read(&user->locked_vm); new_pg = old_pg + num_pg; if (new_pg > max_pg) return -ENOBUFS; } while (atomic_long_cmpxchg(&user->locked_vm, old_pg, new_pg) != old_pg); if (!mmp->user) { mmp->user = get_uid(user); mmp->num_pg = num_pg; } else { mmp->num_pg += num_pg; } return 0; } EXPORT_SYMBOL_GPL(mm_account_pinned_pages); void mm_unaccount_pinned_pages(struct mmpin *mmp) { if (mmp->user) { atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm); free_uid(mmp->user); } } EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages); struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size) { struct ubuf_info *uarg; struct sk_buff *skb; WARN_ON_ONCE(!in_task()); skb = sock_omalloc(sk, 0, GFP_KERNEL); if (!skb) return NULL; BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb)); uarg = (void *)skb->cb; uarg->mmp.user = NULL; if (mm_account_pinned_pages(&uarg->mmp, size)) { kfree_skb(skb); return NULL; } uarg->callback = sock_zerocopy_callback; uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1; uarg->len = 1; uarg->bytelen = size; uarg->zerocopy = 1; refcount_set(&uarg->refcnt, 1); sock_hold(sk); return uarg; } EXPORT_SYMBOL_GPL(sock_zerocopy_alloc); static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg) { return container_of((void *)uarg, struct sk_buff, cb); } struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size, struct ubuf_info *uarg) { if (uarg) { const u32 byte_limit = 1 << 19; /* limit to a few TSO */ u32 bytelen, next; /* realloc only when socket is locked (TCP, UDP cork), * so uarg->len and sk_zckey access is serialized */ if (!sock_owned_by_user(sk)) { WARN_ON_ONCE(1); return NULL; } bytelen = uarg->bytelen + size; if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) { /* TCP can create new skb to attach new uarg */ if (sk->sk_type == SOCK_STREAM) goto new_alloc; return NULL; } next = (u32)atomic_read(&sk->sk_zckey); if ((u32)(uarg->id + uarg->len) == next) { if (mm_account_pinned_pages(&uarg->mmp, size)) return NULL; uarg->len++; uarg->bytelen = bytelen; atomic_set(&sk->sk_zckey, ++next); /* no extra ref when appending to datagram (MSG_MORE) */ if (sk->sk_type == SOCK_STREAM) sock_zerocopy_get(uarg); return uarg; } } new_alloc: return sock_zerocopy_alloc(sk, size); } EXPORT_SYMBOL_GPL(sock_zerocopy_realloc); static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len) { struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); u32 old_lo, old_hi; u64 sum_len; old_lo = serr->ee.ee_info; old_hi = serr->ee.ee_data; sum_len = old_hi - old_lo + 1ULL + len; if (sum_len >= (1ULL << 32)) return false; if (lo != old_hi + 1) return false; serr->ee.ee_data += len; return true; } void sock_zerocopy_callback(struct ubuf_info *uarg, bool success) { struct sk_buff *tail, *skb = skb_from_uarg(uarg); struct sock_exterr_skb *serr; struct sock *sk = skb->sk; struct sk_buff_head *q; unsigned long flags; u32 lo, hi; u16 len; mm_unaccount_pinned_pages(&uarg->mmp); /* if !len, there was only 1 call, and it was aborted * so do not queue a completion notification */ if (!uarg->len || sock_flag(sk, SOCK_DEAD)) goto release; len = uarg->len; lo = uarg->id; hi = uarg->id + len - 1; serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = 0; serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY; serr->ee.ee_data = hi; serr->ee.ee_info = lo; if (!success) serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED; q = &sk->sk_error_queue; spin_lock_irqsave(&q->lock, flags); tail = skb_peek_tail(q); if (!tail || SKB_EXT_ERR(tail)->ee.ee_origin != SO_EE_ORIGIN_ZEROCOPY || !skb_zerocopy_notify_extend(tail, lo, len)) { __skb_queue_tail(q, skb); skb = NULL; } spin_unlock_irqrestore(&q->lock, flags); sk->sk_error_report(sk); release: consume_skb(skb); sock_put(sk); } EXPORT_SYMBOL_GPL(sock_zerocopy_callback); void sock_zerocopy_put(struct ubuf_info *uarg) { if (uarg && refcount_dec_and_test(&uarg->refcnt)) { if (uarg->callback) uarg->callback(uarg, uarg->zerocopy); else consume_skb(skb_from_uarg(uarg)); } } EXPORT_SYMBOL_GPL(sock_zerocopy_put); void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref) { if (uarg) { struct sock *sk = skb_from_uarg(uarg)->sk; atomic_dec(&sk->sk_zckey); uarg->len--; if (have_uref) sock_zerocopy_put(uarg); } } EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort); int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) { return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len); } EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram); int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct msghdr *msg, int len, struct ubuf_info *uarg) { struct ubuf_info *orig_uarg = skb_zcopy(skb); struct iov_iter orig_iter = msg->msg_iter; int err, orig_len = skb->len; /* An skb can only point to one uarg. This edge case happens when * TCP appends to an skb, but zerocopy_realloc triggered a new alloc. */ if (orig_uarg && uarg != orig_uarg) return -EEXIST; err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len); if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) { struct sock *save_sk = skb->sk; /* Streams do not free skb on error. Reset to prev state. */ msg->msg_iter = orig_iter; skb->sk = sk; ___pskb_trim(skb, orig_len); skb->sk = save_sk; return err; } skb_zcopy_set(skb, uarg, NULL); return skb->len - orig_len; } EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream); static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig, gfp_t gfp_mask) { if (skb_zcopy(orig)) { if (skb_zcopy(nskb)) { /* !gfp_mask callers are verified to !skb_zcopy(nskb) */ if (!gfp_mask) { WARN_ON_ONCE(1); return -ENOMEM; } if (skb_uarg(nskb) == skb_uarg(orig)) return 0; if (skb_copy_ubufs(nskb, GFP_ATOMIC)) return -EIO; } skb_zcopy_set(nskb, skb_uarg(orig), NULL); } return 0; } /** * skb_copy_ubufs - copy userspace skb frags buffers to kernel * @skb: the skb to modify * @gfp_mask: allocation priority * * This must be called on SKBTX_DEV_ZEROCOPY skb. * It will copy all frags into kernel and drop the reference * to userspace pages. * * If this function is called from an interrupt gfp_mask() must be * %GFP_ATOMIC. * * Returns 0 on success or a negative error code on failure * to allocate kernel memory to copy to. */ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) { int num_frags = skb_shinfo(skb)->nr_frags; struct page *page, *head = NULL; int i, new_frags; u32 d_off; if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) return -EINVAL; if (!num_frags) goto release; new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < new_frags; i++) { page = alloc_page(gfp_mask); if (!page) { while (head) { struct page *next = (struct page *)page_private(head); put_page(head); head = next; } return -ENOMEM; } set_page_private(page, (unsigned long)head); head = page; } page = head; d_off = 0; for (i = 0; i < num_frags; i++) { skb_frag_t *f = &skb_shinfo(skb)->frags[i]; u32 p_off, p_len, copied; struct page *p; u8 *vaddr; skb_frag_foreach_page(f, skb_frag_off(f), skb_frag_size(f), p, p_off, p_len, copied) { u32 copy, done = 0; vaddr = kmap_atomic(p); while (done < p_len) { if (d_off == PAGE_SIZE) { d_off = 0; page = (struct page *)page_private(page); } copy = min_t(u32, PAGE_SIZE - d_off, p_len - done); memcpy(page_address(page) + d_off, vaddr + p_off + done, copy); done += copy; d_off += copy; } kunmap_atomic(vaddr); } } /* skb frags release userspace buffers */ for (i = 0; i < num_frags; i++) skb_frag_unref(skb, i); /* skb frags point to kernel buffers */ for (i = 0; i < new_frags - 1; i++) { __skb_fill_page_desc(skb, i, head, 0, PAGE_SIZE); head = (struct page *)page_private(head); } __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); skb_shinfo(skb)->nr_frags = new_frags; release: skb_zcopy_clear(skb, false); return 0; } EXPORT_SYMBOL_GPL(skb_copy_ubufs); /** * skb_clone - duplicate an sk_buff * @skb: buffer to clone * @gfp_mask: allocation priority * * Duplicate an &sk_buff. The new one is not owned by a socket. Both * copies share the same packet data but not structure. The new * buffer has a reference count of 1. If the allocation fails the * function returns %NULL otherwise the new buffer is returned. * * If this function is called from an interrupt gfp_mask() must be * %GFP_ATOMIC. */ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) { struct sk_buff_fclones *fclones = container_of(skb, struct sk_buff_fclones, skb1); struct sk_buff *n; if (skb_orphan_frags(skb, gfp_mask)) return NULL; if (skb->fclone == SKB_FCLONE_ORIG && refcount_read(&fclones->fclone_ref) == 1) { n = &fclones->skb2; refcount_set(&fclones->fclone_ref, 2); } else { if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); if (!n) return NULL; n->fclone = SKB_FCLONE_UNAVAILABLE; } return __skb_clone(n, skb); } EXPORT_SYMBOL(skb_clone); void skb_headers_offset_update(struct sk_buff *skb, int off) { /* Only adjust this if it actually is csum_start rather than csum */ if (skb->ip_summed == CHECKSUM_PARTIAL) skb->csum_start += off; /* {transport,network,mac}_header and tail are relative to skb->head */ skb->transport_header += off; skb->network_header += off; if (skb_mac_header_was_set(skb)) skb->mac_header += off; skb->inner_transport_header += off; skb->inner_network_header += off; skb->inner_mac_header += off; } EXPORT_SYMBOL(skb_headers_offset_update); void skb_copy_header(struct sk_buff *new, const struct sk_buff *old) { __copy_skb_header(new, old); skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; } EXPORT_SYMBOL(skb_copy_header); static inline int skb_alloc_rx_flag(const struct sk_buff *skb) { if (skb_pfmemalloc(skb)) return SKB_ALLOC_RX; return 0; } /** * skb_copy - create private copy of an sk_buff * @skb: buffer to copy * @gfp_mask: allocation priority * * Make a copy of both an &sk_buff and its data. This is used when the * caller wishes to modify the data and needs a private copy of the * data to alter. Returns %NULL on failure or the pointer to the buffer * on success. The returned buffer has a reference count of 1. * * As by-product this function converts non-linear &sk_buff to linear * one, so that &sk_buff becomes completely private and caller is allowed * to modify all the data of returned buffer. This means that this * function is not recommended for use in circumstances when only * header is going to be modified. Use pskb_copy() instead. */ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { int headerlen = skb_headroom(skb); unsigned int size = skb_end_offset(skb) + skb->data_len; struct sk_buff *n = __alloc_skb(size, gfp_mask, skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) return NULL; /* Set the data pointer */ skb_reserve(n, headerlen); /* Set the tail pointer and length */ skb_put(n, skb->len); BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)); skb_copy_header(n, skb); return n; } EXPORT_SYMBOL(skb_copy); /** * __pskb_copy_fclone - create copy of an sk_buff with private head. * @skb: buffer to copy * @headroom: headroom of new skb * @gfp_mask: allocation priority * @fclone: if true allocate the copy of the skb from the fclone * cache instead of the head cache; it is recommended to set this * to true for the cases where the copy will likely be cloned * * Make a copy of both an &sk_buff and part of its data, located * in header. Fragmented data remain shared. This is used when * the caller wishes to modify only header of &sk_buff and needs * private copy of the header to alter. Returns %NULL on failure * or the pointer to the buffer on success. * The returned buffer has a reference count of 1. */ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, gfp_t gfp_mask, bool fclone) { unsigned int size = skb_headlen(skb) + headroom; int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0); struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE); if (!n) goto out; /* Set the data pointer */ skb_reserve(n, headroom); /* Set the tail pointer and length */ skb_put(n, skb_headlen(skb)); /* Copy the bytes */ skb_copy_from_linear_data(skb, n->data, n->len); n->truesize += skb->data_len; n->data_len = skb->data_len; n->len = skb->len; if (skb_shinfo(skb)->nr_frags) { int i; if (skb_orphan_frags(skb, gfp_mask) || skb_zerocopy_clone(n, skb, gfp_mask)) { kfree_skb(n); n = NULL; goto out; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; skb_frag_ref(skb, i); } skb_shinfo(n)->nr_frags = i; } if (skb_has_frag_list(skb)) { skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; skb_clone_fraglist(n); } skb_copy_header(n, skb); out: return n; } EXPORT_SYMBOL(__pskb_copy_fclone); /** * pskb_expand_head - reallocate header of &sk_buff * @skb: buffer to reallocate * @nhead: room to add at head * @ntail: room to add at tail * @gfp_mask: allocation priority * * Expands (or creates identical copy, if @nhead and @ntail are zero) * header of @skb. &sk_buff itself is not changed. &sk_buff MUST have * reference count of 1. Returns zero in the case of success or error, * if expansion failed. In the last case, &sk_buff is not changed. * * All the pointers pointing into skb header may change and must be * reloaded after call to this function. */ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask) { int i, osize = skb_end_offset(skb); int size = osize + nhead + ntail; long off; u8 *data; BUG_ON(nhead < 0); BUG_ON(skb_shared(skb)); size = SKB_DATA_ALIGN(size); if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), gfp_mask, NUMA_NO_NODE, NULL); if (!data) goto nodata; size = SKB_WITH_OVERHEAD(ksize(data)); /* Copy only real data... and, alas, header. This should be * optimized for the cases when header is void. */ memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head); memcpy((struct skb_shared_info *)(data + size), skb_shinfo(skb), offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); /* * if shinfo is shared we must drop the old head gracefully, but if it * is not we can just drop the old head and let the existing refcount * be since all we did is relocate the values */ if (skb_cloned(skb)) { if (skb_orphan_frags(skb, gfp_mask)) goto nofrags; if (skb_zcopy(skb)) refcount_inc(&skb_uarg(skb)->refcnt); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) skb_frag_ref(skb, i); if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); skb_release_data(skb); } else { skb_free_head(skb); } off = (data + nhead) - skb->head; skb->head = data; skb->head_frag = 0; skb->data += off; #ifdef NET_SKBUFF_DATA_USES_OFFSET skb->end = size; off = nhead; #else skb->end = skb->head + size; #endif skb->tail += off; skb_headers_offset_update(skb, nhead); skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); skb_metadata_clear(skb); /* It is not generally safe to change skb->truesize. * For the moment, we really care of rx path, or * when skb is orphaned (not attached to a socket). */ if (!skb->sk || skb->destructor == sock_edemux) skb->truesize += size - osize; return 0; nofrags: kfree(data); nodata: return -ENOMEM; } EXPORT_SYMBOL(pskb_expand_head); /* Make private copy of skb with writable head and some headroom */ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) { struct sk_buff *skb2; int delta = headroom - skb_headroom(skb); if (delta <= 0) skb2 = pskb_copy(skb, GFP_ATOMIC); else { skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { kfree_skb(skb2); skb2 = NULL; } } return skb2; } EXPORT_SYMBOL(skb_realloc_headroom); /** * skb_copy_expand - copy and expand sk_buff * @skb: buffer to copy * @newheadroom: new free bytes at head * @newtailroom: new free bytes at tail * @gfp_mask: allocation priority * * Make a copy of both an &sk_buff and its data and while doing so * allocate additional space. * * This is used when the caller wishes to modify the data and needs a * private copy of the data to alter as well as more space for new fields. * Returns %NULL on failure or the pointer to the buffer * on success. The returned buffer has a reference count of 1. * * You must pass %GFP_ATOMIC as the allocation priority if this function * is called from an interrupt. */ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, gfp_t gfp_mask) { /* * Allocate the copy buffer */ struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom, gfp_mask, skb_alloc_rx_flag(skb), NUMA_NO_NODE); int oldheadroom = skb_headroom(skb); int head_copy_len, head_copy_off; if (!n) return NULL; skb_reserve(n, newheadroom); /* Set the tail pointer and length */ skb_put(n, skb->len); head_copy_len = oldheadroom; head_copy_off = 0; if (newheadroom <= head_copy_len) head_copy_len = newheadroom; else head_copy_off = newheadroom - head_copy_len; /* Copy the linear header and data. */ BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, skb->len + head_copy_len)); skb_copy_header(n, skb); skb_headers_offset_update(n, newheadroom - oldheadroom); return n; } EXPORT_SYMBOL(skb_copy_expand); /** * __skb_pad - zero pad the tail of an skb * @skb: buffer to pad * @pad: space to pad * @free_on_error: free buffer on error * * Ensure that a buffer is followed by a padding area that is zero * filled. Used by network drivers which may DMA or transfer data * beyond the buffer end onto the wire. * * May return error in out of memory cases. The skb is freed on error * if @free_on_error is true. */ int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error) { int err; int ntail; /* If the skbuff is non linear tailroom is always zero.. */ if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) { memset(skb->data+skb->len, 0, pad); return 0; } ntail = skb->data_len + pad - (skb->end - skb->tail); if (likely(skb_cloned(skb) || ntail > 0)) { err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC); if (unlikely(err)) goto free_skb; } /* FIXME: The use of this function with non-linear skb's really needs * to be audited. */ err = skb_linearize(skb); if (unlikely(err)) goto free_skb; memset(skb->data + skb->len, 0, pad); return 0; free_skb: if (free_on_error) kfree_skb(skb); return err; } EXPORT_SYMBOL(__skb_pad); /** * pskb_put - add data to the tail of a potentially fragmented buffer * @skb: start of the buffer to use * @tail: tail fragment of the buffer to use * @len: amount of data to add * * This function extends the used data area of the potentially * fragmented buffer. @tail must be the last fragment of @skb -- or * @skb itself. If this would exceed the total buffer size the kernel * will panic. A pointer to the first byte of the extra data is * returned. */ void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) { if (tail != skb) { skb->data_len += len; skb->len += len; } return skb_put(tail, len); } EXPORT_SYMBOL_GPL(pskb_put); /** * skb_put - add data to a buffer * @skb: buffer to use * @len: amount of data to add * * This function extends the used data area of the buffer. If this would * exceed the total buffer size the kernel will panic. A pointer to the * first byte of the extra data is returned. */ void *skb_put(struct sk_buff *skb, unsigned int len) { void *tmp = skb_tail_pointer(skb); SKB_LINEAR_ASSERT(skb); skb->tail += len; skb->len += len; if (unlikely(skb->tail > skb->end)) skb_over_panic(skb, len, __builtin_return_address(0)); return tmp; } EXPORT_SYMBOL(skb_put); /** * skb_push - add data to the start of a buffer * @skb: buffer to use * @len: amount of data to add * * This function extends the used data area of the buffer at the buffer * start. If this would exceed the total buffer headroom the kernel will * panic. A pointer to the first byte of the extra data is returned. */ void *skb_push(struct sk_buff *skb, unsigned int len) { skb->data -= len; skb->len += len; if (unlikely(skb->data < skb->head)) skb_under_panic(skb, len, __builtin_return_address(0)); return skb->data; } EXPORT_SYMBOL(skb_push); /** * skb_pull - remove data from the start of a buffer * @skb: buffer to use * @len: amount of data to remove * * This function removes data from the start of a buffer, returning * the memory to the headroom. A pointer to the next data in the buffer * is returned. Once the data has been pulled future pushes will overwrite * the old data. */ void *skb_pull(struct sk_buff *skb, unsigned int len) { return skb_pull_inline(skb, len); } EXPORT_SYMBOL(skb_pull); /** * skb_trim - remove end from a buffer * @skb: buffer to alter * @len: new length * * Cut the length of a buffer down by removing data from the tail. If * the buffer is already under the length specified it is not modified. * The skb must be linear. */ void skb_trim(struct sk_buff *skb, unsigned int len) { if (skb->len > len) __skb_trim(skb, len); } EXPORT_SYMBOL(skb_trim); /* Trims skb to length len. It can change skb pointers. */ int ___pskb_trim(struct sk_buff *skb, unsigned int len) { struct sk_buff **fragp; struct sk_buff *frag; int offset = skb_headlen(skb); int nfrags = skb_shinfo(skb)->nr_frags; int i; int err; if (skb_cloned(skb) && unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))) return err; i = 0; if (offset >= len) goto drop_pages; for (; i < nfrags; i++) { int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]); if (end < len) { offset = end; continue; } skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset); drop_pages: skb_shinfo(skb)->nr_frags = i; for (; i < nfrags; i++) skb_frag_unref(skb, i); if (skb_has_frag_list(skb)) skb_drop_fraglist(skb); goto done; } for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp); fragp = &frag->next) { int end = offset + frag->len; if (skb_shared(frag)) { struct sk_buff *nfrag; nfrag = skb_clone(frag, GFP_ATOMIC); if (unlikely(!nfrag)) return -ENOMEM; nfrag->next = frag->next; consume_skb(frag); frag = nfrag; *fragp = frag; } if (end < len) { offset = end; continue; } if (end > len && unlikely((err = pskb_trim(frag, len - offset)))) return err; if (frag->next) skb_drop_list(&frag->next); break; } done: if (len > skb_headlen(skb)) { skb->data_len -= skb->len - len; skb->len = len; } else { skb->len = len; skb->data_len = 0; skb_set_tail_pointer(skb, len); } if (!skb->sk || skb->destructor == sock_edemux) skb_condense(skb); return 0; } EXPORT_SYMBOL(___pskb_trim); /* Note : use pskb_trim_rcsum() instead of calling this directly */ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len) { if (skb->ip_summed == CHECKSUM_COMPLETE) { int delta = skb->len - len; skb->csum = csum_block_sub(skb->csum, skb_checksum(skb, len, delta, 0), len); } else if (skb->ip_summed == CHECKSUM_PARTIAL) { int hdlen = (len > skb_headlen(skb)) ? skb_headlen(skb) : len; int offset = skb_checksum_start_offset(skb) + skb->csum_offset; if (offset + sizeof(__sum16) > hdlen) return -EINVAL; } return __pskb_trim(skb, len); } EXPORT_SYMBOL(pskb_trim_rcsum_slow); /** * __pskb_pull_tail - advance tail of skb header * @skb: buffer to reallocate * @delta: number of bytes to advance tail * * The function makes a sense only on a fragmented &sk_buff, * it expands header moving its tail forward and copying necessary * data from fragmented part. * * &sk_buff MUST have reference count of 1. * * Returns %NULL (and &sk_buff does not change) if pull failed * or value of new tail of skb in the case of success. * * All the pointers pointing into skb header may change and must be * reloaded after call to this function. */ /* Moves tail of skb head forward, copying data from fragmented part, * when it is necessary. * 1. It may fail due to malloc failure. * 2. It may change skb pointers. * * It is pretty complicated. Luckily, it is called only in exceptional cases. */ void *__pskb_pull_tail(struct sk_buff *skb, int delta) { /* If skb has not enough free space at tail, get new one * plus 128 bytes for future expansions. If we have enough * room at tail, reallocate without expansion only if skb is cloned. */ int i, k, eat = (skb->tail + delta) - skb->end; if (eat > 0 || skb_cloned(skb)) { if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, GFP_ATOMIC)) return NULL; } BUG_ON(skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta)); /* Optimization: no fragments, no reasons to preestimate * size of pulled pages. Superb. */ if (!skb_has_frag_list(skb)) goto pull_pages; /* Estimate size of pulled pages. */ eat = delta; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); if (size >= eat) goto pull_pages; eat -= size; } /* If we need update frag list, we are in troubles. * Certainly, it is possible to add an offset to skb data, * but taking into account that pulling is expected to * be very rare operation, it is worth to fight against * further bloating skb head and crucify ourselves here instead. * Pure masohism, indeed. 8)8) */ if (eat) { struct sk_buff *list = skb_shinfo(skb)->frag_list; struct sk_buff *clone = NULL; struct sk_buff *insp = NULL; do { if (list->len <= eat) { /* Eaten as whole. */ eat -= list->len; list = list->next; insp = list; } else { /* Eaten partially. */ if (skb_shared(list)) { /* Sucks! We need to fork list. :-( */ clone = skb_clone(list, GFP_ATOMIC); if (!clone) return NULL; insp = list->next; list = clone; } else { /* This may be pulled without * problems. */ insp = list; } if (!pskb_pull(list, eat)) { kfree_skb(clone); return NULL; } break; } } while (eat); /* Free pulled out fragments. */ while ((list = skb_shinfo(skb)->frag_list) != insp) { skb_shinfo(skb)->frag_list = list->next; kfree_skb(list); } /* And insert new clone at head. */ if (clone) { clone->next = list; skb_shinfo(skb)->frag_list = clone; } } /* Success! Now we may commit changes to skb data. */ pull_pages: eat = delta; k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); if (size <= eat) { skb_frag_unref(skb, i); eat -= size; } else { skb_frag_t *frag = &skb_shinfo(skb)->frags[k]; *frag = skb_shinfo(skb)->frags[i]; if (eat) { skb_frag_off_add(frag, eat); skb_frag_size_sub(frag, eat); if (!i) goto end; eat = 0; } k++; } } skb_shinfo(skb)->nr_frags = k; end: skb->tail += delta; skb->data_len -= delta; if (!skb->data_len) skb_zcopy_clear(skb, false); return skb_tail_pointer(skb); } EXPORT_SYMBOL(__pskb_pull_tail); /** * skb_copy_bits - copy bits from skb to kernel buffer * @skb: source skb * @offset: offset in source * @to: destination buffer * @len: number of bytes to copy * * Copy the specified number of bytes from the source skb to the * destination buffer. * * CAUTION ! : * If its prototype is ever changed, * check arch/{*}/net/{*}.S files, * since it is called from BPF assembly code. */ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) { int start = skb_headlen(skb); struct sk_buff *frag_iter; int i, copy; if (offset > (int)skb->len - len) goto fault; /* Copy header. */ if ((copy = start - offset) > 0) { if (copy > len) copy = len; skb_copy_from_linear_data_offset(skb, offset, to, copy); if ((len -= copy) == 0) return 0; offset += copy; to += copy; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; skb_frag_t *f = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); end = start + skb_frag_size(f); if ((copy = end - offset) > 0) { u32 p_off, p_len, copied; struct page *p; u8 *vaddr; if (copy > len) copy = len; skb_frag_foreach_page(f, skb_frag_off(f) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); memcpy(to + copied, vaddr + p_off, p_len); kunmap_atomic(vaddr); } if ((len -= copy) == 0) return 0; offset += copy; to += copy; } start = end; } skb_walk_frags(skb, frag_iter) { int end; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { if (copy > len) copy = len; if (skb_copy_bits(frag_iter, offset - start, to, copy)) goto fault; if ((len -= copy) == 0) return 0; offset += copy; to += copy; } start = end; } if (!len) return 0; fault: return -EFAULT; } EXPORT_SYMBOL(skb_copy_bits); /* * Callback from splice_to_pipe(), if we need to release some pages * at the end of the spd in case we error'ed out in filling the pipe. */ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) { put_page(spd->pages[i]); } static struct page *linear_to_page(struct page *page, unsigned int *len, unsigned int *offset, struct sock *sk) { struct page_frag *pfrag = sk_page_frag(sk); if (!sk_page_frag_refill(sk, pfrag)) return NULL; *len = min_t(unsigned int, *len, pfrag->size - pfrag->offset); memcpy(page_address(pfrag->page) + pfrag->offset, page_address(page) + *offset, *len); *offset = pfrag->offset; pfrag->offset += *len; return pfrag->page; } static bool spd_can_coalesce(const struct splice_pipe_desc *spd, struct page *page, unsigned int offset) { return spd->nr_pages && spd->pages[spd->nr_pages - 1] == page && (spd->partial[spd->nr_pages - 1].offset + spd->partial[spd->nr_pages - 1].len == offset); } /* * Fill page/offset/length into spd, if it can hold more pages. */ static bool spd_fill_page(struct splice_pipe_desc *spd, struct pipe_inode_info *pipe, struct page *page, unsigned int *len, unsigned int offset, bool linear, struct sock *sk) { if (unlikely(spd->nr_pages == MAX_SKB_FRAGS)) return true; if (linear) { page = linear_to_page(page, len, &offset, sk); if (!page) return true; } if (spd_can_coalesce(spd, page, offset)) { spd->partial[spd->nr_pages - 1].len += *len; return false; } get_page(page); spd->pages[spd->nr_pages] = page; spd->partial[spd->nr_pages].len = *len; spd->partial[spd->nr_pages].offset = offset; spd->nr_pages++; return false; } static bool __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, unsigned int *len, struct splice_pipe_desc *spd, bool linear, struct sock *sk, struct pipe_inode_info *pipe) { if (!*len) return true; /* skip this segment if already processed */ if (*off >= plen) { *off -= plen; return false; } /* ignore any bits we already processed */ poff += *off; plen -= *off; *off = 0; do { unsigned int flen = min(*len, plen); if (spd_fill_page(spd, pipe, page, &flen, poff, linear, sk)) return true; poff += flen; plen -= flen; *len -= flen; } while (*len && plen); return false; } /* * Map linear and fragment data from the skb to spd. It reports true if the * pipe is full or if we already spliced the requested length. */ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, unsigned int *offset, unsigned int *len, struct splice_pipe_desc *spd, struct sock *sk) { int seg; struct sk_buff *iter; /* map the linear part : * If skb->head_frag is set, this 'linear' part is backed by a * fragment, and if the head is not shared with any clones then * we can avoid a copy since we own the head portion of this page. */ if (__splice_segment(virt_to_page(skb->data), (unsigned long) skb->data & (PAGE_SIZE - 1), skb_headlen(skb), offset, len, spd, skb_head_is_locked(skb), sk, pipe)) return true; /* * then map the fragments */ for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; if (__splice_segment(skb_frag_page(f), skb_frag_off(f), skb_frag_size(f), offset, len, spd, false, sk, pipe)) return true; } skb_walk_frags(skb, iter) { if (*offset >= iter->len) { *offset -= iter->len; continue; } /* __skb_splice_bits() only fails if the output has no room * left, so no point in going over the frag_list for the error * case. */ if (__skb_splice_bits(iter, pipe, offset, len, spd, sk)) return true; } return false; } /* * Map data from the skb to a pipe. Should handle both the linear part, * the fragments, and the frag list. */ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, struct pipe_inode_info *pipe, unsigned int tlen, unsigned int flags) { struct partial_page partial[MAX_SKB_FRAGS]; struct page *pages[MAX_SKB_FRAGS]; struct splice_pipe_desc spd = { .pages = pages, .partial = partial, .nr_pages_max = MAX_SKB_FRAGS, .ops = &nosteal_pipe_buf_ops, .spd_release = sock_spd_release, }; int ret = 0; __skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk); if (spd.nr_pages) ret = splice_to_pipe(pipe, &spd); return ret; } EXPORT_SYMBOL_GPL(skb_splice_bits); /* Send skb data on a socket. Socket must be locked. */ int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, int len) { unsigned int orig_len = len; struct sk_buff *head = skb; unsigned short fragidx; int slen, ret; do_frag_list: /* Deal with head data */ while (offset < skb_headlen(skb) && len) { struct kvec kv; struct msghdr msg; slen = min_t(int, len, skb_headlen(skb) - offset); kv.iov_base = skb->data + offset; kv.iov_len = slen; memset(&msg, 0, sizeof(msg)); msg.msg_flags = MSG_DONTWAIT; ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen); if (ret <= 0) goto error; offset += ret; len -= ret; } /* All the data was skb head? */ if (!len) goto out; /* Make offset relative to start of frags */ offset -= skb_headlen(skb); /* Find where we are in frag list */ for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx]; if (offset < skb_frag_size(frag)) break; offset -= skb_frag_size(frag); } for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx]; slen = min_t(size_t, len, skb_frag_size(frag) - offset); while (slen) { ret = kernel_sendpage_locked(sk, skb_frag_page(frag), skb_frag_off(frag) + offset, slen, MSG_DONTWAIT); if (ret <= 0) goto error; len -= ret; offset += ret; slen -= ret; } offset = 0; } if (len) { /* Process any frag lists */ if (skb == head) { if (skb_has_frag_list(skb)) { skb = skb_shinfo(skb)->frag_list; goto do_frag_list; } } else if (skb->next) { skb = skb->next; goto do_frag_list; } } out: return orig_len - len; error: return orig_len == len ? ret : orig_len - len; } EXPORT_SYMBOL_GPL(skb_send_sock_locked); /** * skb_store_bits - store bits from kernel buffer to skb * @skb: destination buffer * @offset: offset in destination * @from: source buffer * @len: number of bytes to copy * * Copy the specified number of bytes from the source buffer to the * destination skb. This function handles all the messy bits of * traversing fragment lists and such. */ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) { int start = skb_headlen(skb); struct sk_buff *frag_iter; int i, copy; if (offset > (int)skb->len - len) goto fault; if ((copy = start - offset) > 0) { if (copy > len) copy = len; skb_copy_to_linear_data_offset(skb, offset, from, copy); if ((len -= copy) == 0) return 0; offset += copy; from += copy; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; int end; WARN_ON(start > offset + len); end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { u32 p_off, p_len, copied; struct page *p; u8 *vaddr; if (copy > len) copy = len; skb_frag_foreach_page(frag, skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); memcpy(vaddr + p_off, from + copied, p_len); kunmap_atomic(vaddr); } if ((len -= copy) == 0) return 0; offset += copy; from += copy; } start = end; } skb_walk_frags(skb, frag_iter) { int end; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { if (copy > len) copy = len; if (skb_store_bits(frag_iter, offset - start, from, copy)) goto fault; if ((len -= copy) == 0) return 0; offset += copy; from += copy; } start = end; } if (!len) return 0; fault: return -EFAULT; } EXPORT_SYMBOL(skb_store_bits); /* Checksum skb data. */ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum, const struct skb_checksum_ops *ops) { int start = skb_headlen(skb); int i, copy = start - offset; struct sk_buff *frag_iter; int pos = 0; /* Checksum header. */ if (copy > 0) { if (copy > len) copy = len; csum = INDIRECT_CALL_1(ops->update, csum_partial_ext, skb->data + offset, copy, csum); if ((len -= copy) == 0) return csum; offset += copy; pos = copy; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { u32 p_off, p_len, copied; struct page *p; __wsum csum2; u8 *vaddr; if (copy > len) copy = len; skb_frag_foreach_page(frag, skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); csum2 = INDIRECT_CALL_1(ops->update, csum_partial_ext, vaddr + p_off, p_len, 0); kunmap_atomic(vaddr); csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext, csum, csum2, pos, p_len); pos += p_len; } if (!(len -= copy)) return csum; offset += copy; } start = end; } skb_walk_frags(skb, frag_iter) { int end; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { __wsum csum2; if (copy > len) copy = len; csum2 = __skb_checksum(frag_iter, offset - start, copy, 0, ops); csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext, csum, csum2, pos, copy); if ((len -= copy) == 0) return csum; offset += copy; pos += copy; } start = end; } BUG_ON(len); return csum; } EXPORT_SYMBOL(__skb_checksum); __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum) { const struct skb_checksum_ops ops = { .update = csum_partial_ext, .combine = csum_block_add_ext, }; return __skb_checksum(skb, offset, len, csum, &ops); } EXPORT_SYMBOL(skb_checksum); /* Both of above in one bottle. */ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len) { int start = skb_headlen(skb); int i, copy = start - offset; struct sk_buff *frag_iter; int pos = 0; __wsum csum = 0; /* Copy header. */ if (copy > 0) { if (copy > len) copy = len; csum = csum_partial_copy_nocheck(skb->data + offset, to, copy); if ((len -= copy) == 0) return csum; offset += copy; to += copy; pos = copy; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; WARN_ON(start > offset + len); end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; u32 p_off, p_len, copied; struct page *p; __wsum csum2; u8 *vaddr; if (copy > len) copy = len; skb_frag_foreach_page(frag, skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); csum2 = csum_partial_copy_nocheck(vaddr + p_off, to + copied, p_len); kunmap_atomic(vaddr); csum = csum_block_add(csum, csum2, pos); pos += p_len; } if (!(len -= copy)) return csum; offset += copy; to += copy; } start = end; } skb_walk_frags(skb, frag_iter) { __wsum csum2; int end; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { if (copy > len) copy = len; csum2 = skb_copy_and_csum_bits(frag_iter, offset - start, to, copy); csum = csum_block_add(csum, csum2, pos); if ((len -= copy) == 0) return csum; offset += copy; to += copy; pos += copy; } start = end; } BUG_ON(len); return csum; } EXPORT_SYMBOL(skb_copy_and_csum_bits); __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) { __sum16 sum; sum = csum_fold(skb_checksum(skb, 0, len, skb->csum)); /* See comments in __skb_checksum_complete(). */ if (likely(!sum)) { if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev, skb); } if (!skb_shared(skb)) skb->csum_valid = !sum; return sum; } EXPORT_SYMBOL(__skb_checksum_complete_head); /* This function assumes skb->csum already holds pseudo header's checksum, * which has been changed from the hardware checksum, for example, by * __skb_checksum_validate_complete(). And, the original skb->csum must * have been validated unsuccessfully for CHECKSUM_COMPLETE case. * * It returns non-zero if the recomputed checksum is still invalid, otherwise * zero. The new checksum is stored back into skb->csum unless the skb is * shared. */ __sum16 __skb_checksum_complete(struct sk_buff *skb) { __wsum csum; __sum16 sum; csum = skb_checksum(skb, 0, skb->len, 0); sum = csum_fold(csum_add(skb->csum, csum)); /* This check is inverted, because we already knew the hardware * checksum is invalid before calling this function. So, if the * re-computed checksum is valid instead, then we have a mismatch * between the original skb->csum and skb_checksum(). This means either * the original hardware checksum is incorrect or we screw up skb->csum * when moving skb->data around. */ if (likely(!sum)) { if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev, skb); } if (!skb_shared(skb)) { /* Save full packet checksum */ skb->csum = csum; skb->ip_summed = CHECKSUM_COMPLETE; skb->csum_complete_sw = 1; skb->csum_valid = !sum; } return sum; } EXPORT_SYMBOL(__skb_checksum_complete); static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum) { net_warn_ratelimited( "%s: attempt to compute crc32c without libcrc32c.ko\n", __func__); return 0; } static __wsum warn_crc32c_csum_combine(__wsum csum, __wsum csum2, int offset, int len) { net_warn_ratelimited( "%s: attempt to compute crc32c without libcrc32c.ko\n", __func__); return 0; } static const struct skb_checksum_ops default_crc32c_ops = { .update = warn_crc32c_csum_update, .combine = warn_crc32c_csum_combine, }; const struct skb_checksum_ops *crc32c_csum_stub __read_mostly = &default_crc32c_ops; EXPORT_SYMBOL(crc32c_csum_stub); /** * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy() * @from: source buffer * * Calculates the amount of linear headroom needed in the 'to' skb passed * into skb_zerocopy(). */ unsigned int skb_zerocopy_headlen(const struct sk_buff *from) { unsigned int hlen = 0; if (!from->head_frag || skb_headlen(from) < L1_CACHE_BYTES || skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) { hlen = skb_headlen(from); if (!hlen) hlen = from->len; } if (skb_has_frag_list(from)) hlen = from->len; return hlen; } EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); /** * skb_zerocopy - Zero copy skb to skb * @to: destination buffer * @from: source buffer * @len: number of bytes to copy from source buffer * @hlen: size of linear headroom in destination buffer * * Copies up to `len` bytes from `from` to `to` by creating references * to the frags in the source buffer. * * The `hlen` as calculated by skb_zerocopy_headlen() specifies the * headroom in the `to` buffer. * * Return value: * 0: everything is OK * -ENOMEM: couldn't orphan frags of @from due to lack of memory * -EFAULT: skb_copy_bits() found some problem with skb geometry */ int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) { int i, j = 0; int plen = 0; /* length of skb->head fragment */ int ret; struct page *page; unsigned int offset; BUG_ON(!from->head_frag && !hlen); /* dont bother with small payloads */ if (len <= skb_tailroom(to)) return skb_copy_bits(from, 0, skb_put(to, len), len); if (hlen) { ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen); if (unlikely(ret)) return ret; len -= hlen; } else { plen = min_t(int, skb_headlen(from), len); if (plen) { page = virt_to_head_page(from->head); offset = from->data - (unsigned char *)page_address(page); __skb_fill_page_desc(to, 0, page, offset, plen); get_page(page); j = 1; len -= plen; } } to->truesize += len + plen; to->len += len + plen; to->data_len += len + plen; if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) { skb_tx_error(from); return -ENOMEM; } skb_zerocopy_clone(to, from, GFP_ATOMIC); for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { int size; if (!len) break; skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; size = min_t(int, skb_frag_size(&skb_shinfo(to)->frags[j]), len); skb_frag_size_set(&skb_shinfo(to)->frags[j], size); len -= size; skb_frag_ref(to, j); j++; } skb_shinfo(to)->nr_frags = j; return 0; } EXPORT_SYMBOL_GPL(skb_zerocopy); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) { __wsum csum; long csstart; if (skb->ip_summed == CHECKSUM_PARTIAL) csstart = skb_checksum_start_offset(skb); else csstart = skb_headlen(skb); BUG_ON(csstart > skb_headlen(skb)); skb_copy_from_linear_data(skb, to, csstart); csum = 0; if (csstart != skb->len) csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, skb->len - csstart); if (skb->ip_summed == CHECKSUM_PARTIAL) { long csstuff = csstart + skb->csum_offset; *((__sum16 *)(to + csstuff)) = csum_fold(csum); } } EXPORT_SYMBOL(skb_copy_and_csum_dev); /** * skb_dequeue - remove from the head of the queue * @list: list to dequeue from * * Remove the head of the list. The list lock is taken so the function * may be used safely with other locking list functions. The head item is * returned or %NULL if the list is empty. */ struct sk_buff *skb_dequeue(struct sk_buff_head *list) { unsigned long flags; struct sk_buff *result; spin_lock_irqsave(&list->lock, flags); result = __skb_dequeue(list); spin_unlock_irqrestore(&list->lock, flags); return result; } EXPORT_SYMBOL(skb_dequeue); /** * skb_dequeue_tail - remove from the tail of the queue * @list: list to dequeue from * * Remove the tail of the list. The list lock is taken so the function * may be used safely with other locking list functions. The tail item is * returned or %NULL if the list is empty. */ struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) { unsigned long flags; struct sk_buff *result; spin_lock_irqsave(&list->lock, flags); result = __skb_dequeue_tail(list); spin_unlock_irqrestore(&list->lock, flags); return result; } EXPORT_SYMBOL(skb_dequeue_tail); /** * skb_queue_purge - empty a list * @list: list to empty * * Delete all buffers on an &sk_buff list. Each buffer is removed from * the list and one reference dropped. This function takes the list * lock and is atomic with respect to other list locking functions. */ void skb_queue_purge(struct sk_buff_head *list) { struct sk_buff *skb; while ((skb = skb_dequeue(list)) != NULL) kfree_skb(skb); } EXPORT_SYMBOL(skb_queue_purge); /** * skb_rbtree_purge - empty a skb rbtree * @root: root of the rbtree to empty * Return value: the sum of truesizes of all purged skbs. * * Delete all buffers on an &sk_buff rbtree. Each buffer is removed from * the list and one reference dropped. This function does not take * any lock. Synchronization should be handled by the caller (e.g., TCP * out-of-order queue is protected by the socket lock). */ unsigned int skb_rbtree_purge(struct rb_root *root) { struct rb_node *p = rb_first(root); unsigned int sum = 0; while (p) { struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); p = rb_next(p); rb_erase(&skb->rbnode, root); sum += skb->truesize; kfree_skb(skb); } return sum; } /** * skb_queue_head - queue a buffer at the list head * @list: list to use * @newsk: buffer to queue * * Queue a buffer at the start of the list. This function takes the * list lock and can be used safely with other locking &sk_buff functions * safely. * * A buffer cannot be placed on two lists at the same time. */ void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) { unsigned long flags; spin_lock_irqsave(&list->lock, flags); __skb_queue_head(list, newsk); spin_unlock_irqrestore(&list->lock, flags); } EXPORT_SYMBOL(skb_queue_head); /** * skb_queue_tail - queue a buffer at the list tail * @list: list to use * @newsk: buffer to queue * * Queue a buffer at the tail of the list. This function takes the * list lock and can be used safely with other locking &sk_buff functions * safely. * * A buffer cannot be placed on two lists at the same time. */ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) { unsigned long flags; spin_lock_irqsave(&list->lock, flags); __skb_queue_tail(list, newsk); spin_unlock_irqrestore(&list->lock, flags); } EXPORT_SYMBOL(skb_queue_tail); /** * skb_unlink - remove a buffer from a list * @skb: buffer to remove * @list: list to use * * Remove a packet from a list. The list locks are taken and this * function is atomic with respect to other list locked calls * * You must know what list the SKB is on. */ void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { unsigned long flags; spin_lock_irqsave(&list->lock, flags); __skb_unlink(skb, list); spin_unlock_irqrestore(&list->lock, flags); } EXPORT_SYMBOL(skb_unlink); /** * skb_append - append a buffer * @old: buffer to insert after * @newsk: buffer to insert * @list: list to use * * Place a packet after a given packet in a list. The list locks are taken * and this function is atomic with respect to other list locked calls. * A buffer cannot be placed on two lists at the same time. */ void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { unsigned long flags; spin_lock_irqsave(&list->lock, flags); __skb_queue_after(list, old, newsk); spin_unlock_irqrestore(&list->lock, flags); } EXPORT_SYMBOL(skb_append); static inline void skb_split_inside_header(struct sk_buff *skb, struct sk_buff* skb1, const u32 len, const int pos) { int i; skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len), pos - len); /* And move data appendix as is. */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; skb_shinfo(skb)->nr_frags = 0; skb1->data_len = skb->data_len; skb1->len += skb1->data_len; skb->data_len = 0; skb->len = len; skb_set_tail_pointer(skb, len); } static inline void skb_split_no_header(struct sk_buff *skb, struct sk_buff* skb1, const u32 len, int pos) { int i, k = 0; const int nfrags = skb_shinfo(skb)->nr_frags; skb_shinfo(skb)->nr_frags = 0; skb1->len = skb1->data_len = skb->len - len; skb->len = len; skb->data_len = len - pos; for (i = 0; i < nfrags; i++) { int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); if (pos + size > len) { skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; if (pos < len) { /* Split frag. * We have two variants in this case: * 1. Move all the frag to the second * part, if it is possible. F.e. * this approach is mandatory for TUX, * where splitting is expensive. * 2. Split is accurately. We make this. */ skb_frag_ref(skb, i); skb_frag_off_add(&skb_shinfo(skb1)->frags[0], len - pos); skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos); skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos); skb_shinfo(skb)->nr_frags++; } k++; } else skb_shinfo(skb)->nr_frags++; pos += size; } skb_shinfo(skb1)->nr_frags = k; } /** * skb_split - Split fragmented skb to two parts at length len. * @skb: the buffer to split * @skb1: the buffer to receive the second part * @len: new length for skb */ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); skb_shinfo(skb1)->tx_flags |= skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; skb_zerocopy_clone(skb1, skb, 0); if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ skb_split_no_header(skb, skb1, len, pos); } EXPORT_SYMBOL(skb_split); /* Shifting from/to a cloned skb is a no-go. * * Caller cannot keep skb_shinfo related pointers past calling here! */ static int skb_prepare_for_shift(struct sk_buff *skb) { int ret = 0; if (skb_cloned(skb)) { /* Save and restore truesize: pskb_expand_head() may reallocate * memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we * cannot change truesize at this point. */ unsigned int save_truesize = skb->truesize; ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); skb->truesize = save_truesize; } return ret; } /** * skb_shift - Shifts paged data partially from skb to another * @tgt: buffer into which tail data gets added * @skb: buffer from which the paged data comes from * @shiftlen: shift up to this many bytes * * Attempts to shift up to shiftlen worth of bytes, which may be less than * the length of the skb, from skb to tgt. Returns number bytes shifted. * It's up to caller to free skb if everything was shifted. * * If @tgt runs out of frags, the whole operation is aborted. * * Skb cannot include anything else but paged data while tgt is allowed * to have non-paged data as well. * * TODO: full sized shift could be optimized but that would need * specialized skb free'er to handle frags without up-to-date nr_frags. */ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) { int from, to, merge, todo; skb_frag_t *fragfrom, *fragto; BUG_ON(shiftlen > skb->len); if (skb_headlen(skb)) return 0; if (skb_zcopy(tgt) || skb_zcopy(skb)) return 0; todo = shiftlen; from = 0; to = skb_shinfo(tgt)->nr_frags; fragfrom = &skb_shinfo(skb)->frags[from]; /* Actual merge is delayed until the point when we know we can * commit all, so that we don't have to undo partial changes */ if (!to || !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom), skb_frag_off(fragfrom))) { merge = -1; } else { merge = to - 1; todo -= skb_frag_size(fragfrom); if (todo < 0) { if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) return 0; /* All previous frag pointers might be stale! */ fragfrom = &skb_shinfo(skb)->frags[from]; fragto = &skb_shinfo(tgt)->frags[merge]; skb_frag_size_add(fragto, shiftlen); skb_frag_size_sub(fragfrom, shiftlen); skb_frag_off_add(fragfrom, shiftlen); goto onlymerged; } from++; } /* Skip full, not-fitting skb to avoid expensive operations */ if ((shiftlen == skb->len) && (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to)) return 0; if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) return 0; while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) { if (to == MAX_SKB_FRAGS) return 0; fragfrom = &skb_shinfo(skb)->frags[from]; fragto = &skb_shinfo(tgt)->frags[to]; if (todo >= skb_frag_size(fragfrom)) { *fragto = *fragfrom; todo -= skb_frag_size(fragfrom); from++; to++; } else { __skb_frag_ref(fragfrom); skb_frag_page_copy(fragto, fragfrom); skb_frag_off_copy(fragto, fragfrom); skb_frag_size_set(fragto, todo); skb_frag_off_add(fragfrom, todo); skb_frag_size_sub(fragfrom, todo); todo = 0; to++; break; } } /* Ready to "commit" this state change to tgt */ skb_shinfo(tgt)->nr_frags = to; if (merge >= 0) { fragfrom = &skb_shinfo(skb)->frags[0]; fragto = &skb_shinfo(tgt)->frags[merge]; skb_frag_size_add(fragto, skb_frag_size(fragfrom)); __skb_frag_unref(fragfrom); } /* Reposition in the original skb */ to = 0; while (from < skb_shinfo(skb)->nr_frags) skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++]; skb_shinfo(skb)->nr_frags = to; BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags); onlymerged: /* Most likely the tgt won't ever need its checksum anymore, skb on * the other hand might need it if it needs to be resent */ tgt->ip_summed = CHECKSUM_PARTIAL; skb->ip_summed = CHECKSUM_PARTIAL; /* Yak, is it really working this way? Some helper please? */ skb->len -= shiftlen; skb->data_len -= shiftlen; skb->truesize -= shiftlen; tgt->len += shiftlen; tgt->data_len += shiftlen; tgt->truesize += shiftlen; return shiftlen; } /** * skb_prepare_seq_read - Prepare a sequential read of skb data * @skb: the buffer to read * @from: lower offset of data to be read * @to: upper offset of data to be read * @st: state variable * * Initializes the specified state variable. Must be called before * invoking skb_seq_read() for the first time. */ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, unsigned int to, struct skb_seq_state *st) { st->lower_offset = from; st->upper_offset = to; st->root_skb = st->cur_skb = skb; st->frag_idx = st->stepped_offset = 0; st->frag_data = NULL; } EXPORT_SYMBOL(skb_prepare_seq_read); /** * skb_seq_read - Sequentially read skb data * @consumed: number of bytes consumed by the caller so far * @data: destination pointer for data to be returned * @st: state variable * * Reads a block of skb data at @consumed relative to the * lower offset specified to skb_prepare_seq_read(). Assigns * the head of the data block to @data and returns the length * of the block or 0 if the end of the skb data or the upper * offset has been reached. * * The caller is not required to consume all of the data * returned, i.e. @consumed is typically set to the number * of bytes already consumed and the next call to * skb_seq_read() will return the remaining part of the block. * * Note 1: The size of each block of data returned can be arbitrary, * this limitation is the cost for zerocopy sequential * reads of potentially non linear data. * * Note 2: Fragment lists within fragments are not implemented * at the moment, state->root_skb could be replaced with * a stack for this purpose. */ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, struct skb_seq_state *st) { unsigned int block_limit, abs_offset = consumed + st->lower_offset; skb_frag_t *frag; if (unlikely(abs_offset >= st->upper_offset)) { if (st->frag_data) { kunmap_atomic(st->frag_data); st->frag_data = NULL; } return 0; } next_skb: block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; if (abs_offset < block_limit && !st->frag_data) { *data = st->cur_skb->data + (abs_offset - st->stepped_offset); return block_limit - abs_offset; } if (st->frag_idx == 0 && !st->frag_data) st->stepped_offset += skb_headlen(st->cur_skb); while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; block_limit = skb_frag_size(frag) + st->stepped_offset; if (abs_offset < block_limit) { if (!st->frag_data) st->frag_data = kmap_atomic(skb_frag_page(frag)); *data = (u8 *) st->frag_data + skb_frag_off(frag) + (abs_offset - st->stepped_offset); return block_limit - abs_offset; } if (st->frag_data) { kunmap_atomic(st->frag_data); st->frag_data = NULL; } st->frag_idx++; st->stepped_offset += skb_frag_size(frag); } if (st->frag_data) { kunmap_atomic(st->frag_data); st->frag_data = NULL; } if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) { st->cur_skb = skb_shinfo(st->root_skb)->frag_list; st->frag_idx = 0; goto next_skb; } else if (st->cur_skb->next) { st->cur_skb = st->cur_skb->next; st->frag_idx = 0; goto next_skb; } return 0; } EXPORT_SYMBOL(skb_seq_read); /** * skb_abort_seq_read - Abort a sequential read of skb data * @st: state variable * * Must be called if skb_seq_read() was not called until it * returned 0. */ void skb_abort_seq_read(struct skb_seq_state *st) { if (st->frag_data) kunmap_atomic(st->frag_data); } EXPORT_SYMBOL(skb_abort_seq_read); #define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, struct ts_config *conf, struct ts_state *state) { return skb_seq_read(offset, text, TS_SKB_CB(state)); } static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) { skb_abort_seq_read(TS_SKB_CB(state)); } /** * skb_find_text - Find a text pattern in skb data * @skb: the buffer to look in * @from: search offset * @to: search limit * @config: textsearch configuration * * Finds a pattern in the skb data according to the specified * textsearch configuration. Use textsearch_next() to retrieve * subsequent occurrences of the pattern. Returns the offset * to the first occurrence or UINT_MAX if no match was found. */ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config) { struct ts_state state; unsigned int ret; config->get_next_block = skb_ts_get_next_block; config->finish = skb_ts_finish; skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state)); ret = textsearch_find(config, &state); return (ret <= to - from ? ret : UINT_MAX); } EXPORT_SYMBOL(skb_find_text); int skb_append_pagefrags(struct sk_buff *skb, struct page *page, int offset, size_t size) { int i = skb_shinfo(skb)->nr_frags; if (skb_can_coalesce(skb, i, page, offset)) { skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size); } else if (i < MAX_SKB_FRAGS) { get_page(page); skb_fill_page_desc(skb, i, page, offset, size); } else { return -EMSGSIZE; } return 0; } EXPORT_SYMBOL_GPL(skb_append_pagefrags); /** * skb_pull_rcsum - pull skb and update receive checksum * @skb: buffer to update * @len: length of data pulled * * This function performs an skb_pull on the packet and updates * the CHECKSUM_COMPLETE checksum. It should be used on * receive path processing instead of skb_pull unless you know * that the checksum difference is zero (e.g., a valid IP header) * or you are setting ip_summed to CHECKSUM_NONE. */ void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) { unsigned char *data = skb->data; BUG_ON(len > skb->len); __skb_pull(skb, len); skb_postpull_rcsum(skb, data, len); return skb->data; } EXPORT_SYMBOL_GPL(skb_pull_rcsum); static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb) { skb_frag_t head_frag; struct page *page; page = virt_to_head_page(frag_skb->head); __skb_frag_set_page(&head_frag, page); skb_frag_off_set(&head_frag, frag_skb->data - (unsigned char *)page_address(page)); skb_frag_size_set(&head_frag, skb_headlen(frag_skb)); return head_frag; } struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features, unsigned int offset) { struct sk_buff *list_skb = skb_shinfo(skb)->frag_list; unsigned int tnl_hlen = skb_tnl_header_len(skb); unsigned int delta_truesize = 0; unsigned int delta_len = 0; struct sk_buff *tail = NULL; struct sk_buff *nskb, *tmp; int err; skb_push(skb, -skb_network_offset(skb) + offset); skb_shinfo(skb)->frag_list = NULL; do { nskb = list_skb; list_skb = list_skb->next; err = 0; if (skb_shared(nskb)) { tmp = skb_clone(nskb, GFP_ATOMIC); if (tmp) { consume_skb(nskb); nskb = tmp; err = skb_unclone(nskb, GFP_ATOMIC); } else { err = -ENOMEM; } } if (!tail) skb->next = nskb; else tail->next = nskb; if (unlikely(err)) { nskb->next = list_skb; goto err_linearize; } tail = nskb; delta_len += nskb->len; delta_truesize += nskb->truesize; skb_push(nskb, -skb_network_offset(nskb) + offset); skb_release_head_state(nskb); __copy_skb_header(nskb, skb); skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb)); skb_copy_from_linear_data_offset(skb, -tnl_hlen, nskb->data - tnl_hlen, offset + tnl_hlen); if (skb_needs_linearize(nskb, features) && __skb_linearize(nskb)) goto err_linearize; } while (list_skb); skb->truesize = skb->truesize - delta_truesize; skb->data_len = skb->data_len - delta_len; skb->len = skb->len - delta_len; skb_gso_reset(skb); skb->prev = tail; if (skb_needs_linearize(skb, features) && __skb_linearize(skb)) goto err_linearize; skb_get(skb); return skb; err_linearize: kfree_skb_list(skb->next); skb->next = NULL; return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL_GPL(skb_segment_list); int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) { if (unlikely(p->len + skb->len >= 65536)) return -E2BIG; if (NAPI_GRO_CB(p)->last == p) skb_shinfo(p)->frag_list = skb; else NAPI_GRO_CB(p)->last->next = skb; skb_pull(skb, skb_gro_offset(skb)); NAPI_GRO_CB(p)->last = skb; NAPI_GRO_CB(p)->count++; p->data_len += skb->len; p->truesize += skb->truesize; p->len += skb->len; NAPI_GRO_CB(skb)->same_flow = 1; return 0; } /** * skb_segment - Perform protocol segmentation on skb. * @head_skb: buffer to segment * @features: features for the output path (see dev->features) * * This function performs segmentation on the given skb. It returns * a pointer to the first in a list of new skbs for the segments. * In case of error it returns ERR_PTR(err). */ struct sk_buff *skb_segment(struct sk_buff *head_skb, netdev_features_t features) { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list; skb_frag_t *frag = skb_shinfo(head_skb)->frags; unsigned int mss = skb_shinfo(head_skb)->gso_size; unsigned int doffset = head_skb->data - skb_mac_header(head_skb); struct sk_buff *frag_skb = head_skb; unsigned int offset = doffset; unsigned int tnl_hlen = skb_tnl_header_len(head_skb); unsigned int partial_segs = 0; unsigned int headroom; unsigned int len = head_skb->len; __be16 proto; bool csum, sg; int nfrags = skb_shinfo(head_skb)->nr_frags; int err = -ENOMEM; int i = 0; int pos; if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) && (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) { /* gso_size is untrusted, and we have a frag_list with a linear * non head_frag head. * * (we assume checking the first list_skb member suffices; * i.e if either of the list_skb members have non head_frag * head, then the first one has too). * * If head_skb's headlen does not fit requested gso_size, it * means that the frag_list members do NOT terminate on exact * gso_size boundaries. Hence we cannot perform skb_frag_t page * sharing. Therefore we must fallback to copying the frag_list * skbs; we do so by disabling SG. */ if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) features &= ~NETIF_F_SG; } __skb_push(head_skb, doffset); proto = skb_network_protocol(head_skb, NULL); if (unlikely(!proto)) return ERR_PTR(-EINVAL); sg = !!(features & NETIF_F_SG); csum = !!can_checksum_protocol(features, proto); if (sg && csum && (mss != GSO_BY_FRAGS)) { if (!(features & NETIF_F_GSO_PARTIAL)) { struct sk_buff *iter; unsigned int frag_len; if (!list_skb || !net_gso_ok(features, skb_shinfo(head_skb)->gso_type)) goto normal; /* If we get here then all the required * GSO features except frag_list are supported. * Try to split the SKB to multiple GSO SKBs * with no frag_list. * Currently we can do that only when the buffers don't * have a linear part and all the buffers except * the last are of the same length. */ frag_len = list_skb->len; skb_walk_frags(head_skb, iter) { if (frag_len != iter->len && iter->next) goto normal; if (skb_headlen(iter) && !iter->head_frag) goto normal; len -= iter->len; } if (len != frag_len) goto normal; } /* GSO partial only requires that we trim off any excess that * doesn't fit into an MSS sized block, so take care of that * now. */ partial_segs = len / mss; if (partial_segs > 1) mss *= partial_segs; else partial_segs = 0; } normal: headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); do { struct sk_buff *nskb; skb_frag_t *nskb_frag; int hsize; int size; if (unlikely(mss == GSO_BY_FRAGS)) { len = list_skb->len; } else { len = head_skb->len - offset; if (len > mss) len = mss; } hsize = skb_headlen(head_skb) - offset; if (hsize < 0) hsize = 0; if (hsize > len || !sg) hsize = len; if (!hsize && i >= nfrags && skb_headlen(list_skb) && (skb_headlen(list_skb) == len || sg)) { BUG_ON(skb_headlen(list_skb) > len); i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; frag_skb = list_skb; pos += skb_headlen(list_skb); while (pos < offset + len) { BUG_ON(i >= nfrags); size = skb_frag_size(frag); if (pos + size > offset + len) break; i++; pos += size; frag++; } nskb = skb_clone(list_skb, GFP_ATOMIC); list_skb = list_skb->next; if (unlikely(!nskb)) goto err; if (unlikely(pskb_trim(nskb, len))) { kfree_skb(nskb); goto err; } hsize = skb_end_offset(nskb); if (skb_cow_head(nskb, doffset + headroom)) { kfree_skb(nskb); goto err; } nskb->truesize += skb_end_offset(nskb) - hsize; skb_release_head_state(nskb); __skb_push(nskb, doffset); } else { nskb = __alloc_skb(hsize + doffset + headroom, GFP_ATOMIC, skb_alloc_rx_flag(head_skb), NUMA_NO_NODE); if (unlikely(!nskb)) goto err; skb_reserve(nskb, headroom); __skb_put(nskb, doffset); } if (segs) tail->next = nskb; else segs = nskb; tail = nskb; __copy_skb_header(nskb, head_skb); skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom); skb_reset_mac_len(nskb); skb_copy_from_linear_data_offset(head_skb, -tnl_hlen, nskb->data - tnl_hlen, doffset + tnl_hlen); if (nskb->len == len + doffset) goto perform_csum_check; if (!sg) { if (!csum) { if (!nskb->remcsum_offload) nskb->ip_summed = CHECKSUM_NONE; SKB_GSO_CB(nskb)->csum = skb_copy_and_csum_bits(head_skb, offset, skb_put(nskb, len), len); SKB_GSO_CB(nskb)->csum_start = skb_headroom(nskb) + doffset; } else { skb_copy_bits(head_skb, offset, skb_put(nskb, len), len); } continue; } nskb_frag = skb_shinfo(nskb)->frags; skb_copy_from_linear_data_offset(head_skb, offset, skb_put(nskb, hsize), hsize); skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & SKBTX_SHARED_FRAG; if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) goto err; while (pos < offset + len) { if (i >= nfrags) { i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; frag_skb = list_skb; if (!skb_headlen(list_skb)) { BUG_ON(!nfrags); } else { BUG_ON(!list_skb->head_frag); /* to make room for head_frag. */ i--; frag--; } if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) goto err; list_skb = list_skb->next; } if (unlikely(skb_shinfo(nskb)->nr_frags >= MAX_SKB_FRAGS)) { net_warn_ratelimited( "skb_segment: too many frags: %u %u\n", pos, mss); err = -EINVAL; goto err; } *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag; __skb_frag_ref(nskb_frag); size = skb_frag_size(nskb_frag); if (pos < offset) { skb_frag_off_add(nskb_frag, offset - pos); skb_frag_size_sub(nskb_frag, offset - pos); } skb_shinfo(nskb)->nr_frags++; if (pos + size <= offset + len) { i++; frag++; pos += size; } else { skb_frag_size_sub(nskb_frag, pos + size - (offset + len)); goto skip_fraglist; } nskb_frag++; } skip_fraglist: nskb->data_len = len - hsize; nskb->len += nskb->data_len; nskb->truesize += nskb->data_len; perform_csum_check: if (!csum) { if (skb_has_shared_frag(nskb) && __skb_linearize(nskb)) goto err; if (!nskb->remcsum_offload) nskb->ip_summed = CHECKSUM_NONE; SKB_GSO_CB(nskb)->csum = skb_checksum(nskb, doffset, nskb->len - doffset, 0); SKB_GSO_CB(nskb)->csum_start = skb_headroom(nskb) + doffset; } } while ((offset += len) < head_skb->len); /* Some callers want to get the end of the list. * Put it in segs->prev to avoid walking the list. * (see validate_xmit_skb_list() for example) */ segs->prev = tail; if (partial_segs) { struct sk_buff *iter; int type = skb_shinfo(head_skb)->gso_type; unsigned short gso_size = skb_shinfo(head_skb)->gso_size; /* Update type to add partial and then remove dodgy if set */ type |= (features & NETIF_F_GSO_PARTIAL) / NETIF_F_GSO_PARTIAL * SKB_GSO_PARTIAL; type &= ~SKB_GSO_DODGY; /* Update GSO info and prepare to start updating headers on * our way back down the stack of protocols. */ for (iter = segs; iter; iter = iter->next) { skb_shinfo(iter)->gso_size = gso_size; skb_shinfo(iter)->gso_segs = partial_segs; skb_shinfo(iter)->gso_type = type; SKB_GSO_CB(iter)->data_offset = skb_headroom(iter) + doffset; } if (tail->len - doffset <= gso_size) skb_shinfo(tail)->gso_size = 0; else if (tail != segs) skb_shinfo(tail)->gso_segs = DIV_ROUND_UP(tail->len - doffset, gso_size); } /* Following permits correct backpressure, for protocols * using skb_set_owner_w(). * Idea is to tranfert ownership from head_skb to last segment. */ if (head_skb->destructor == sock_wfree) { swap(tail->truesize, head_skb->truesize); swap(tail->destructor, head_skb->destructor); swap(tail->sk, head_skb->sk); } return segs; err: kfree_skb_list(segs); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(skb_segment); int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) { struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb); unsigned int offset = skb_gro_offset(skb); unsigned int headlen = skb_headlen(skb); unsigned int len = skb_gro_len(skb); unsigned int delta_truesize; struct sk_buff *lp; if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush)) return -E2BIG; lp = NAPI_GRO_CB(p)->last; pinfo = skb_shinfo(lp); if (headlen <= offset) { skb_frag_t *frag; skb_frag_t *frag2; int i = skbinfo->nr_frags; int nr_frags = pinfo->nr_frags + i; if (nr_frags > MAX_SKB_FRAGS) goto merge; offset -= headlen; pinfo->nr_frags = nr_frags; skbinfo->nr_frags = 0; frag = pinfo->frags + nr_frags; frag2 = skbinfo->frags + i; do { *--frag = *--frag2; } while (--i); skb_frag_off_add(frag, offset); skb_frag_size_sub(frag, offset); /* all fragments truesize : remove (head size + sk_buff) */ delta_truesize = skb->truesize - SKB_TRUESIZE(skb_end_offset(skb)); skb->truesize -= skb->data_len; skb->len -= skb->data_len; skb->data_len = 0; NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE; goto done; } else if (skb->head_frag) { int nr_frags = pinfo->nr_frags; skb_frag_t *frag = pinfo->frags + nr_frags; struct page *page = virt_to_head_page(skb->head); unsigned int first_size = headlen - offset; unsigned int first_offset; if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS) goto merge; first_offset = skb->data - (unsigned char *)page_address(page) + offset; pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags; __skb_frag_set_page(frag, page); skb_frag_off_set(frag, first_offset); skb_frag_size_set(frag, first_size); memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags); /* We dont need to clear skbinfo->nr_frags here */ delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; goto done; } merge: delta_truesize = skb->truesize; if (offset > headlen) { unsigned int eat = offset - headlen; skb_frag_off_add(&skbinfo->frags[0], eat); skb_frag_size_sub(&skbinfo->frags[0], eat); skb->data_len -= eat; skb->len -= eat; offset = headlen; } __skb_pull(skb, offset); if (NAPI_GRO_CB(p)->last == p) skb_shinfo(p)->frag_list = skb; else NAPI_GRO_CB(p)->last->next = skb; NAPI_GRO_CB(p)->last = skb; __skb_header_release(skb); lp = p; done: NAPI_GRO_CB(p)->count++; p->data_len += len; p->truesize += delta_truesize; p->len += len; if (lp != p) { lp->data_len += len; lp->truesize += delta_truesize; lp->len += len; } NAPI_GRO_CB(skb)->same_flow = 1; return 0; } #ifdef CONFIG_SKB_EXTENSIONS #define SKB_EXT_ALIGN_VALUE 8 #define SKB_EXT_CHUNKSIZEOF(x) (ALIGN((sizeof(x)), SKB_EXT_ALIGN_VALUE) / SKB_EXT_ALIGN_VALUE) static const u8 skb_ext_type_len[] = { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) [SKB_EXT_BRIDGE_NF] = SKB_EXT_CHUNKSIZEOF(struct nf_bridge_info), #endif #ifdef CONFIG_XFRM [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path), #endif #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext), #endif #if IS_ENABLED(CONFIG_MPTCP) [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext), #endif #if IS_ENABLED(CONFIG_KCOV) [SKB_EXT_KCOV_HANDLE] = SKB_EXT_CHUNKSIZEOF(u64), #endif }; static __always_inline unsigned int skb_ext_total_length(void) { return SKB_EXT_CHUNKSIZEOF(struct skb_ext) + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) skb_ext_type_len[SKB_EXT_BRIDGE_NF] + #endif #ifdef CONFIG_XFRM skb_ext_type_len[SKB_EXT_SEC_PATH] + #endif #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) skb_ext_type_len[TC_SKB_EXT] + #endif #if IS_ENABLED(CONFIG_MPTCP) skb_ext_type_len[SKB_EXT_MPTCP] + #endif #if IS_ENABLED(CONFIG_KCOV) skb_ext_type_len[SKB_EXT_KCOV_HANDLE] + #endif 0; } static void skb_extensions_init(void) { BUILD_BUG_ON(SKB_EXT_NUM >= 8); BUILD_BUG_ON(skb_ext_total_length() > 255); skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache", SKB_EXT_ALIGN_VALUE * skb_ext_total_length(), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); } #else static void skb_extensions_init(void) {} #endif void __init skb_init(void) { skbuff_head_cache = kmem_cache_create_usercopy("skbuff_head_cache", sizeof(struct sk_buff), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, offsetof(struct sk_buff, cb), sizeof_field(struct sk_buff, cb), NULL); skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", sizeof(struct sk_buff_fclones), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); skb_extensions_init(); } static int __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len, unsigned int recursion_level) { int start = skb_headlen(skb); int i, copy = start - offset; struct sk_buff *frag_iter; int elt = 0; if (unlikely(recursion_level >= 24)) return -EMSGSIZE; if (copy > 0) { if (copy > len) copy = len; sg_set_buf(sg, skb->data + offset, copy); elt++; if ((len -= copy) == 0) return elt; offset += copy; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; WARN_ON(start > offset + len); end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; if (unlikely(elt && sg_is_last(&sg[elt - 1]))) return -EMSGSIZE; if (copy > len) copy = len; sg_set_page(&sg[elt], skb_frag_page(frag), copy, skb_frag_off(frag) + offset - start); elt++; if (!(len -= copy)) return elt; offset += copy; } start = end; } skb_walk_frags(skb, frag_iter) { int end, ret; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { if (unlikely(elt && sg_is_last(&sg[elt - 1]))) return -EMSGSIZE; if (copy > len) copy = len; ret = __skb_to_sgvec(frag_iter, sg+elt, offset - start, copy, recursion_level + 1); if (unlikely(ret < 0)) return ret; elt += ret; if ((len -= copy) == 0) return elt; offset += copy; } start = end; } BUG_ON(len); return elt; } /** * skb_to_sgvec - Fill a scatter-gather list from a socket buffer * @skb: Socket buffer containing the buffers to be mapped * @sg: The scatter-gather list to map into * @offset: The offset into the buffer's contents to start mapping * @len: Length of buffer space to be mapped * * Fill the specified scatter-gather list with mappings/pointers into a * region of the buffer space attached to a socket buffer. Returns either * the number of scatterlist items used, or -EMSGSIZE if the contents * could not fit. */ int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) { int nsg = __skb_to_sgvec(skb, sg, offset, len, 0); if (nsg <= 0) return nsg; sg_mark_end(&sg[nsg - 1]); return nsg; } EXPORT_SYMBOL_GPL(skb_to_sgvec); /* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given * sglist without mark the sg which contain last skb data as the end. * So the caller can mannipulate sg list as will when padding new data after * the first call without calling sg_unmark_end to expend sg list. * * Scenario to use skb_to_sgvec_nomark: * 1. sg_init_table * 2. skb_to_sgvec_nomark(payload1) * 3. skb_to_sgvec_nomark(payload2) * * This is equivalent to: * 1. sg_init_table * 2. skb_to_sgvec(payload1) * 3. sg_unmark_end * 4. skb_to_sgvec(payload2) * * When mapping mutilple payload conditionally, skb_to_sgvec_nomark * is more preferable. */ int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) { return __skb_to_sgvec(skb, sg, offset, len, 0); } EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark); /** * skb_cow_data - Check that a socket buffer's data buffers are writable * @skb: The socket buffer to check. * @tailbits: Amount of trailing space to be added * @trailer: Returned pointer to the skb where the @tailbits space begins * * Make sure that the data buffers attached to a socket buffer are * writable. If they are not, private copies are made of the data buffers * and the socket buffer is set to use these instead. * * If @tailbits is given, make sure that there is space to write @tailbits * bytes of data beyond current end of socket buffer. @trailer will be * set to point to the skb in which this space begins. * * The number of scatterlist elements required to completely map the * COW'd and extended socket buffer will be returned. */ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) { int copyflag; int elt; struct sk_buff *skb1, **skb_p; /* If skb is cloned or its head is paged, reallocate * head pulling out all the pages (pages are considered not writable * at the moment even if they are anonymous). */ if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) && !__pskb_pull_tail(skb, __skb_pagelen(skb))) return -ENOMEM; /* Easy case. Most of packets will go this way. */ if (!skb_has_frag_list(skb)) { /* A little of trouble, not enough of space for trailer. * This should not happen, when stack is tuned to generate * good frames. OK, on miss we reallocate and reserve even more * space, 128 bytes is fair. */ if (skb_tailroom(skb) < tailbits && pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC)) return -ENOMEM; /* Voila! */ *trailer = skb; return 1; } /* Misery. We are in troubles, going to mincer fragments... */ elt = 1; skb_p = &skb_shinfo(skb)->frag_list; copyflag = 0; while ((skb1 = *skb_p) != NULL) { int ntail = 0; /* The fragment is partially pulled by someone, * this can happen on input. Copy it and everything * after it. */ if (skb_shared(skb1)) copyflag = 1; /* If the skb is the last, worry about trailer. */ if (skb1->next == NULL && tailbits) { if (skb_shinfo(skb1)->nr_frags || skb_has_frag_list(skb1) || skb_tailroom(skb1) < tailbits) ntail = tailbits + 128; } if (copyflag || skb_cloned(skb1) || ntail || skb_shinfo(skb1)->nr_frags || skb_has_frag_list(skb1)) { struct sk_buff *skb2; /* Fuck, we are miserable poor guys... */ if (ntail == 0) skb2 = skb_copy(skb1, GFP_ATOMIC); else skb2 = skb_copy_expand(skb1, skb_headroom(skb1), ntail, GFP_ATOMIC); if (unlikely(skb2 == NULL)) return -ENOMEM; if (skb1->sk) skb_set_owner_w(skb2, skb1->sk); /* Looking around. Are we still alive? * OK, link new skb, drop old one */ skb2->next = skb1->next; *skb_p = skb2; kfree_skb(skb1); skb1 = skb2; } elt++; *trailer = skb1; skb_p = &skb1->next; } return elt; } EXPORT_SYMBOL_GPL(skb_cow_data); static void sock_rmem_free(struct sk_buff *skb) { struct sock *sk = skb->sk; atomic_sub(skb->truesize, &sk->sk_rmem_alloc); } static void skb_set_err_queue(struct sk_buff *skb) { /* pkt_type of skbs received on local sockets is never PACKET_OUTGOING. * So, it is safe to (mis)use it to mark skbs on the error queue. */ skb->pkt_type = PACKET_OUTGOING; BUILD_BUG_ON(PACKET_OUTGOING == 0); } /* * Note: We dont mem charge error packets (no sk_forward_alloc changes) */ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) { if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned int)READ_ONCE(sk->sk_rcvbuf)) return -ENOMEM; skb_orphan(skb); skb->sk = sk; skb->destructor = sock_rmem_free; atomic_add(skb->truesize, &sk->sk_rmem_alloc); skb_set_err_queue(skb); /* before exiting rcu section, make sure dst is refcounted */ skb_dst_force(skb); skb_queue_tail(&sk->sk_error_queue, skb); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_error_report(sk); return 0; } EXPORT_SYMBOL(sock_queue_err_skb); static bool is_icmp_err_skb(const struct sk_buff *skb) { return skb && (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP || SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP6); } struct sk_buff *sock_dequeue_err_skb(struct sock *sk) { struct sk_buff_head *q = &sk->sk_error_queue; struct sk_buff *skb, *skb_next = NULL; bool icmp_next = false; unsigned long flags; spin_lock_irqsave(&q->lock, flags); skb = __skb_dequeue(q); if (skb && (skb_next = skb_peek(q))) { icmp_next = is_icmp_err_skb(skb_next); if (icmp_next) sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_errno; } spin_unlock_irqrestore(&q->lock, flags); if (is_icmp_err_skb(skb) && !icmp_next) sk->sk_err = 0; if (skb_next) sk->sk_error_report(sk); return skb; } EXPORT_SYMBOL(sock_dequeue_err_skb); /** * skb_clone_sk - create clone of skb, and take reference to socket * @skb: the skb to clone * * This function creates a clone of a buffer that holds a reference on * sk_refcnt. Buffers created via this function are meant to be * returned using sock_queue_err_skb, or free via kfree_skb. * * When passing buffers allocated with this function to sock_queue_err_skb * it is necessary to wrap the call with sock_hold/sock_put in order to * prevent the socket from being released prior to being enqueued on * the sk_error_queue. */ struct sk_buff *skb_clone_sk(struct sk_buff *skb) { struct sock *sk = skb->sk; struct sk_buff *clone; if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) return NULL; clone = skb_clone(skb, GFP_ATOMIC); if (!clone) { sock_put(sk); return NULL; } clone->sk = sk; clone->destructor = sock_efree; return clone; } EXPORT_SYMBOL(skb_clone_sk); static void __skb_complete_tx_timestamp(struct sk_buff *skb, struct sock *sk, int tstype, bool opt_stats) { struct sock_exterr_skb *serr; int err; BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb)); serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_info = tstype; serr->opt_stats = opt_stats; serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0; if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { serr->ee.ee_data = skb_shinfo(skb)->tskey; if (sk->sk_protocol == IPPROTO_TCP && sk->sk_type == SOCK_STREAM) serr->ee.ee_data -= sk->sk_tskey; } err = sock_queue_err_skb(sk, skb); if (err) kfree_skb(skb); } static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly) { bool ret; if (likely(sysctl_tstamp_allow_data || tsonly)) return true; read_lock_bh(&sk->sk_callback_lock); ret = sk->sk_socket && sk->sk_socket->file && file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW); read_unlock_bh(&sk->sk_callback_lock); return ret; } void skb_complete_tx_timestamp(struct sk_buff *skb, struct skb_shared_hwtstamps *hwtstamps) { struct sock *sk = skb->sk; if (!skb_may_tx_timestamp(sk, false)) goto err; /* Take a reference to prevent skb_orphan() from freeing the socket, * but only if the socket refcount is not zero. */ if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) { *skb_hwtstamps(skb) = *hwtstamps; __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false); sock_put(sk); return; } err: kfree_skb(skb); } EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); void __skb_tstamp_tx(struct sk_buff *orig_skb, struct skb_shared_hwtstamps *hwtstamps, struct sock *sk, int tstype) { struct sk_buff *skb; bool tsonly, opt_stats = false; if (!sk) return; if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) && skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS) return; tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY; if (!skb_may_tx_timestamp(sk, tsonly)) return; if (tsonly) { #ifdef CONFIG_INET if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) && sk->sk_protocol == IPPROTO_TCP && sk->sk_type == SOCK_STREAM) { skb = tcp_get_timestamping_opt_stats(sk, orig_skb); opt_stats = true; } else #endif skb = alloc_skb(0, GFP_ATOMIC); } else { skb = skb_clone(orig_skb, GFP_ATOMIC); } if (!skb) return; if (tsonly) { skb_shinfo(skb)->tx_flags |= skb_shinfo(orig_skb)->tx_flags & SKBTX_ANY_TSTAMP; skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey; } if (hwtstamps) *skb_hwtstamps(skb) = *hwtstamps; else skb->tstamp = ktime_get_real(); __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats); } EXPORT_SYMBOL_GPL(__skb_tstamp_tx); void skb_tstamp_tx(struct sk_buff *orig_skb, struct skb_shared_hwtstamps *hwtstamps) { return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk, SCM_TSTAMP_SND); } EXPORT_SYMBOL_GPL(skb_tstamp_tx); void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) { struct sock *sk = skb->sk; struct sock_exterr_skb *serr; int err = 1; skb->wifi_acked_valid = 1; skb->wifi_acked = acked; serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS; /* Take a reference to prevent skb_orphan() from freeing the socket, * but only if the socket refcount is not zero. */ if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) { err = sock_queue_err_skb(sk, skb); sock_put(sk); } if (err) kfree_skb(skb); } EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); /** * skb_partial_csum_set - set up and verify partial csum values for packet * @skb: the skb to set * @start: the number of bytes after skb->data to start checksumming. * @off: the offset from start to place the checksum. * * For untrusted partially-checksummed packets, we need to make sure the values * for skb->csum_start and skb->csum_offset are valid so we don't oops. * * This function checks and sets those values and skb->ip_summed: if this * returns false you should drop the packet. */ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) { u32 csum_end = (u32)start + (u32)off + sizeof(__sum16); u32 csum_start = skb_headroom(skb) + (u32)start; if (unlikely(csum_start > U16_MAX || csum_end > skb_headlen(skb))) { net_warn_ratelimited("bad partial csum: csum=%u/%u headroom=%u headlen=%u\n", start, off, skb_headroom(skb), skb_headlen(skb)); return false; } skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = csum_start; skb->csum_offset = off; skb_set_transport_header(skb, start); return true; } EXPORT_SYMBOL_GPL(skb_partial_csum_set); static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len, unsigned int max) { if (skb_headlen(skb) >= len) return 0; /* If we need to pullup then pullup to the max, so we * won't need to do it again. */ if (max > skb->len) max = skb->len; if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL) return -ENOMEM; if (skb_headlen(skb) < len) return -EPROTO; return 0; } #define MAX_TCP_HDR_LEN (15 * 4) static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb, typeof(IPPROTO_IP) proto, unsigned int off) { int err; switch (proto) { case IPPROTO_TCP: err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr), off + MAX_TCP_HDR_LEN); if (!err && !skb_partial_csum_set(skb, off, offsetof(struct tcphdr, check))) err = -EPROTO; return err ? ERR_PTR(err) : &tcp_hdr(skb)->check; case IPPROTO_UDP: err = skb_maybe_pull_tail(skb, off + sizeof(struct udphdr), off + sizeof(struct udphdr)); if (!err && !skb_partial_csum_set(skb, off, offsetof(struct udphdr, check))) err = -EPROTO; return err ? ERR_PTR(err) : &udp_hdr(skb)->check; } return ERR_PTR(-EPROTO); } /* This value should be large enough to cover a tagged ethernet header plus * maximally sized IP and TCP or UDP headers. */ #define MAX_IP_HDR_LEN 128 static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate) { unsigned int off; bool fragment; __sum16 *csum; int err; fragment = false; err = skb_maybe_pull_tail(skb, sizeof(struct iphdr), MAX_IP_HDR_LEN); if (err < 0) goto out; if (ip_is_fragment(ip_hdr(skb))) fragment = true; off = ip_hdrlen(skb); err = -EPROTO; if (fragment) goto out; csum = skb_checksum_setup_ip(skb, ip_hdr(skb)->protocol, off); if (IS_ERR(csum)) return PTR_ERR(csum); if (recalculate) *csum = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, skb->len - off, ip_hdr(skb)->protocol, 0); err = 0; out: return err; } /* This value should be large enough to cover a tagged ethernet header plus * an IPv6 header, all options, and a maximal TCP or UDP header. */ #define MAX_IPV6_HDR_LEN 256 #define OPT_HDR(type, skb, off) \ (type *)(skb_network_header(skb) + (off)) static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate) { int err; u8 nexthdr; unsigned int off; unsigned int len; bool fragment; bool done; __sum16 *csum; fragment = false; done = false; off = sizeof(struct ipv6hdr); err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN); if (err < 0) goto out; nexthdr = ipv6_hdr(skb)->nexthdr; len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); while (off <= len && !done) { switch (nexthdr) { case IPPROTO_DSTOPTS: case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: { struct ipv6_opt_hdr *hp; err = skb_maybe_pull_tail(skb, off + sizeof(struct ipv6_opt_hdr), MAX_IPV6_HDR_LEN); if (err < 0) goto out; hp = OPT_HDR(struct ipv6_opt_hdr, skb, off); nexthdr = hp->nexthdr; off += ipv6_optlen(hp); break; } case IPPROTO_AH: { struct ip_auth_hdr *hp; err = skb_maybe_pull_tail(skb, off + sizeof(struct ip_auth_hdr), MAX_IPV6_HDR_LEN); if (err < 0) goto out; hp = OPT_HDR(struct ip_auth_hdr, skb, off); nexthdr = hp->nexthdr; off += ipv6_authlen(hp); break; } case IPPROTO_FRAGMENT: { struct frag_hdr *hp; err = skb_maybe_pull_tail(skb, off + sizeof(struct frag_hdr), MAX_IPV6_HDR_LEN); if (err < 0) goto out; hp = OPT_HDR(struct frag_hdr, skb, off); if (hp->frag_off & htons(IP6_OFFSET | IP6_MF)) fragment = true; nexthdr = hp->nexthdr; off += sizeof(struct frag_hdr); break; } default: done = true; break; } } err = -EPROTO; if (!done || fragment) goto out; csum = skb_checksum_setup_ip(skb, nexthdr, off); if (IS_ERR(csum)) return PTR_ERR(csum); if (recalculate) *csum = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len - off, nexthdr, 0); err = 0; out: return err; } /** * skb_checksum_setup - set up partial checksum offset * @skb: the skb to set up * @recalculate: if true the pseudo-header checksum will be recalculated */ int skb_checksum_setup(struct sk_buff *skb, bool recalculate) { int err; switch (skb->protocol) { case htons(ETH_P_IP): err = skb_checksum_setup_ipv4(skb, recalculate); break; case htons(ETH_P_IPV6): err = skb_checksum_setup_ipv6(skb, recalculate); break; default: err = -EPROTO; break; } return err; } EXPORT_SYMBOL(skb_checksum_setup); /** * skb_checksum_maybe_trim - maybe trims the given skb * @skb: the skb to check * @transport_len: the data length beyond the network header * * Checks whether the given skb has data beyond the given transport length. * If so, returns a cloned skb trimmed to this transport length. * Otherwise returns the provided skb. Returns NULL in error cases * (e.g. transport_len exceeds skb length or out-of-memory). * * Caller needs to set the skb transport header and free any returned skb if it * differs from the provided skb. */ static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb, unsigned int transport_len) { struct sk_buff *skb_chk; unsigned int len = skb_transport_offset(skb) + transport_len; int ret; if (skb->len < len) return NULL; else if (skb->len == len) return skb; skb_chk = skb_clone(skb, GFP_ATOMIC); if (!skb_chk) return NULL; ret = pskb_trim_rcsum(skb_chk, len); if (ret) { kfree_skb(skb_chk); return NULL; } return skb_chk; } /** * skb_checksum_trimmed - validate checksum of an skb * @skb: the skb to check * @transport_len: the data length beyond the network header * @skb_chkf: checksum function to use * * Applies the given checksum function skb_chkf to the provided skb. * Returns a checked and maybe trimmed skb. Returns NULL on error. * * If the skb has data beyond the given transport length, then a * trimmed & cloned skb is checked and returned. * * Caller needs to set the skb transport header and free any returned skb if it * differs from the provided skb. */ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, unsigned int transport_len, __sum16(*skb_chkf)(struct sk_buff *skb)) { struct sk_buff *skb_chk; unsigned int offset = skb_transport_offset(skb); __sum16 ret; skb_chk = skb_checksum_maybe_trim(skb, transport_len); if (!skb_chk) goto err; if (!pskb_may_pull(skb_chk, offset)) goto err; skb_pull_rcsum(skb_chk, offset); ret = skb_chkf(skb_chk); skb_push_rcsum(skb_chk, offset); if (ret) goto err; return skb_chk; err: if (skb_chk && skb_chk != skb) kfree_skb(skb_chk); return NULL; } EXPORT_SYMBOL(skb_checksum_trimmed); void __skb_warn_lro_forwarding(const struct sk_buff *skb) { net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", skb->dev->name); } EXPORT_SYMBOL(__skb_warn_lro_forwarding); void kfree_skb_partial(struct sk_buff *skb, bool head_stolen) { if (head_stolen) { skb_release_head_state(skb); kmem_cache_free(skbuff_head_cache, skb); } else { __kfree_skb(skb); } } EXPORT_SYMBOL(kfree_skb_partial); /** * skb_try_coalesce - try to merge skb to prior one * @to: prior buffer * @from: buffer to add * @fragstolen: pointer to boolean * @delta_truesize: how much more was allocated than was requested */ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, bool *fragstolen, int *delta_truesize) { struct skb_shared_info *to_shinfo, *from_shinfo; int i, delta, len = from->len; *fragstolen = false; if (skb_cloned(to)) return false; if (len <= skb_tailroom(to)) { if (len) BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); *delta_truesize = 0; return true; } to_shinfo = skb_shinfo(to); from_shinfo = skb_shinfo(from); if (to_shinfo->frag_list || from_shinfo->frag_list) return false; if (skb_zcopy(to) || skb_zcopy(from)) return false; if (skb_headlen(from) != 0) { struct page *page; unsigned int offset; if (to_shinfo->nr_frags + from_shinfo->nr_frags >= MAX_SKB_FRAGS) return false; if (skb_head_is_locked(from)) return false; delta = from->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); page = virt_to_head_page(from->head); offset = from->data - (unsigned char *)page_address(page); skb_fill_page_desc(to, to_shinfo->nr_frags, page, offset, skb_headlen(from)); *fragstolen = true; } else { if (to_shinfo->nr_frags + from_shinfo->nr_frags > MAX_SKB_FRAGS) return false; delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from)); } WARN_ON_ONCE(delta < len); memcpy(to_shinfo->frags + to_shinfo->nr_frags, from_shinfo->frags, from_shinfo->nr_frags * sizeof(skb_frag_t)); to_shinfo->nr_frags += from_shinfo->nr_frags; if (!skb_cloned(from)) from_shinfo->nr_frags = 0; /* if the skb is not cloned this does nothing * since we set nr_frags to 0. */ for (i = 0; i < from_shinfo->nr_frags; i++) __skb_frag_ref(&from_shinfo->frags[i]); to->truesize += delta; to->len += len; to->data_len += len; *delta_truesize = delta; return true; } EXPORT_SYMBOL(skb_try_coalesce); /** * skb_scrub_packet - scrub an skb * * @skb: buffer to clean * @xnet: packet is crossing netns * * skb_scrub_packet can be used after encapsulating or decapsulting a packet * into/from a tunnel. Some information have to be cleared during these * operations. * skb_scrub_packet can also be used to clean a skb before injecting it in * another namespace (@xnet == true). We have to clear all information in the * skb that could impact namespace isolation. */ void skb_scrub_packet(struct sk_buff *skb, bool xnet) { skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; skb->ignore_df = 0; skb_dst_drop(skb); skb_ext_reset(skb); nf_reset_ct(skb); nf_reset_trace(skb); #ifdef CONFIG_NET_SWITCHDEV skb->offload_fwd_mark = 0; skb->offload_l3_fwd_mark = 0; #endif if (!xnet) return; ipvs_reset(skb); skb->mark = 0; skb->tstamp = 0; } EXPORT_SYMBOL_GPL(skb_scrub_packet); /** * skb_gso_transport_seglen - Return length of individual segments of a gso packet * * @skb: GSO skb * * skb_gso_transport_seglen is used to determine the real size of the * individual segments, including Layer4 headers (TCP/UDP). * * The MAC/L2 or network (IP, IPv6) headers are not accounted for. */ static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) { const struct skb_shared_info *shinfo = skb_shinfo(skb); unsigned int thlen = 0; if (skb->encapsulation) { thlen = skb_inner_transport_header(skb) - skb_transport_header(skb); if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) thlen += inner_tcp_hdrlen(skb); } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { thlen = tcp_hdrlen(skb); } else if (unlikely(skb_is_gso_sctp(skb))) { thlen = sizeof(struct sctphdr); } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { thlen = sizeof(struct udphdr); } /* UFO sets gso_size to the size of the fragmentation * payload, i.e. the size of the L4 (UDP) header is already * accounted for. */ return thlen + shinfo->gso_size; } /** * skb_gso_network_seglen - Return length of individual segments of a gso packet * * @skb: GSO skb * * skb_gso_network_seglen is used to determine the real size of the * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP). * * The MAC/L2 header is not accounted for. */ static unsigned int skb_gso_network_seglen(const struct sk_buff *skb) { unsigned int hdr_len = skb_transport_header(skb) - skb_network_header(skb); return hdr_len + skb_gso_transport_seglen(skb); } /** * skb_gso_mac_seglen - Return length of individual segments of a gso packet * * @skb: GSO skb * * skb_gso_mac_seglen is used to determine the real size of the * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4 * headers (TCP/UDP). */ static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) { unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); return hdr_len + skb_gso_transport_seglen(skb); } /** * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS * * There are a couple of instances where we have a GSO skb, and we * want to determine what size it would be after it is segmented. * * We might want to check: * - L3+L4+payload size (e.g. IP forwarding) * - L2+L3+L4+payload size (e.g. sanity check before passing to driver) * * This is a helper to do that correctly considering GSO_BY_FRAGS. * * @skb: GSO skb * * @seg_len: The segmented length (from skb_gso_*_seglen). In the * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS]. * * @max_len: The maximum permissible length. * * Returns true if the segmented length <= max length. */ static inline bool skb_gso_size_check(const struct sk_buff *skb, unsigned int seg_len, unsigned int max_len) { const struct skb_shared_info *shinfo = skb_shinfo(skb); const struct sk_buff *iter; if (shinfo->gso_size != GSO_BY_FRAGS) return seg_len <= max_len; /* Undo this so we can re-use header sizes */ seg_len -= GSO_BY_FRAGS; skb_walk_frags(skb, iter) { if (seg_len + skb_headlen(iter) > max_len) return false; } return true; } /** * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU? * * @skb: GSO skb * @mtu: MTU to validate against * * skb_gso_validate_network_len validates if a given skb will fit a * wanted MTU once split. It considers L3 headers, L4 headers, and the * payload. */ bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu) { return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu); } EXPORT_SYMBOL_GPL(skb_gso_validate_network_len); /** * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length? * * @skb: GSO skb * @len: length to validate against * * skb_gso_validate_mac_len validates if a given skb will fit a wanted * length once split, including L2, L3 and L4 headers and the payload. */ bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len) { return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len); } EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) { int mac_len, meta_len; void *meta; if (skb_cow(skb, skb_headroom(skb)) < 0) { kfree_skb(skb); return NULL; } mac_len = skb->data - skb_mac_header(skb); if (likely(mac_len > VLAN_HLEN + ETH_TLEN)) { memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb), mac_len - VLAN_HLEN - ETH_TLEN); } meta_len = skb_metadata_len(skb); if (meta_len) { meta = skb_metadata_end(skb) - meta_len; memmove(meta + VLAN_HLEN, meta, meta_len); } skb->mac_header += VLAN_HLEN; return skb; } struct sk_buff *skb_vlan_untag(struct sk_buff *skb) { struct vlan_hdr *vhdr; u16 vlan_tci; if (unlikely(skb_vlan_tag_present(skb))) { /* vlan_tci is already set-up so leave this for another time */ return skb; } skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) goto err_free; /* We may access the two bytes after vlan_hdr in vlan_set_encap_proto(). */ if (unlikely(!pskb_may_pull(skb, VLAN_HLEN + sizeof(unsigned short)))) goto err_free; vhdr = (struct vlan_hdr *)skb->data; vlan_tci = ntohs(vhdr->h_vlan_TCI); __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci); skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); skb = skb_reorder_vlan_header(skb); if (unlikely(!skb)) goto err_free; skb_reset_network_header(skb); skb_reset_transport_header(skb); skb_reset_mac_len(skb); return skb; err_free: kfree_skb(skb); return NULL; } EXPORT_SYMBOL(skb_vlan_untag); int skb_ensure_writable(struct sk_buff *skb, int write_len) { if (!pskb_may_pull(skb, write_len)) return -ENOMEM; if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) return 0; return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); } EXPORT_SYMBOL(skb_ensure_writable); /* remove VLAN header from packet and update csum accordingly. * expects a non skb_vlan_tag_present skb with a vlan tag payload */ int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) { struct vlan_hdr *vhdr; int offset = skb->data - skb_mac_header(skb); int err; if (WARN_ONCE(offset, "__skb_vlan_pop got skb with skb->data not at mac header (offset %d)\n", offset)) { return -EINVAL; } err = skb_ensure_writable(skb, VLAN_ETH_HLEN); if (unlikely(err)) return err; skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); *vlan_tci = ntohs(vhdr->h_vlan_TCI); memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); __skb_pull(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); skb->mac_header += VLAN_HLEN; if (skb_network_offset(skb) < ETH_HLEN) skb_set_network_header(skb, ETH_HLEN); skb_reset_mac_len(skb); return err; } EXPORT_SYMBOL(__skb_vlan_pop); /* Pop a vlan tag either from hwaccel or from payload. * Expects skb->data at mac header. */ int skb_vlan_pop(struct sk_buff *skb) { u16 vlan_tci; __be16 vlan_proto; int err; if (likely(skb_vlan_tag_present(skb))) { __vlan_hwaccel_clear_tag(skb); } else { if (unlikely(!eth_type_vlan(skb->protocol))) return 0; err = __skb_vlan_pop(skb, &vlan_tci); if (err) return err; } /* move next vlan tag to hw accel tag */ if (likely(!eth_type_vlan(skb->protocol))) return 0; vlan_proto = skb->protocol; err = __skb_vlan_pop(skb, &vlan_tci); if (unlikely(err)) return err; __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; } EXPORT_SYMBOL(skb_vlan_pop); /* Push a vlan tag either into hwaccel or into payload (if hwaccel tag present). * Expects skb->data at mac header. */ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) { if (skb_vlan_tag_present(skb)) { int offset = skb->data - skb_mac_header(skb); int err; if (WARN_ONCE(offset, "skb_vlan_push got skb with skb->data not at mac header (offset %d)\n", offset)) { return -EINVAL; } err = __vlan_insert_tag(skb, skb->vlan_proto, skb_vlan_tag_get(skb)); if (err) return err; skb->protocol = skb->vlan_proto; skb->mac_len += VLAN_HLEN; skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); } __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; } EXPORT_SYMBOL(skb_vlan_push); /** * skb_eth_pop() - Drop the Ethernet header at the head of a packet * * @skb: Socket buffer to modify * * Drop the Ethernet header of @skb. * * Expects that skb->data points to the mac header and that no VLAN tags are * present. * * Returns 0 on success, -errno otherwise. */ int skb_eth_pop(struct sk_buff *skb) { if (!pskb_may_pull(skb, ETH_HLEN) || skb_vlan_tagged(skb) || skb_network_offset(skb) < ETH_HLEN) return -EPROTO; skb_pull_rcsum(skb, ETH_HLEN); skb_reset_mac_header(skb); skb_reset_mac_len(skb); return 0; } EXPORT_SYMBOL(skb_eth_pop); /** * skb_eth_push() - Add a new Ethernet header at the head of a packet * * @skb: Socket buffer to modify * @dst: Destination MAC address of the new header * @src: Source MAC address of the new header * * Prepend @skb with a new Ethernet header. * * Expects that skb->data points to the mac header, which must be empty. * * Returns 0 on success, -errno otherwise. */ int skb_eth_push(struct sk_buff *skb, const unsigned char *dst, const unsigned char *src) { struct ethhdr *eth; int err; if (skb_network_offset(skb) || skb_vlan_tag_present(skb)) return -EPROTO; err = skb_cow_head(skb, sizeof(*eth)); if (err < 0) return err; skb_push(skb, sizeof(*eth)); skb_reset_mac_header(skb); skb_reset_mac_len(skb); eth = eth_hdr(skb); ether_addr_copy(eth->h_dest, dst); ether_addr_copy(eth->h_source, src); eth->h_proto = skb->protocol; skb_postpush_rcsum(skb, eth, sizeof(*eth)); return 0; } EXPORT_SYMBOL(skb_eth_push); /* Update the ethertype of hdr and the skb csum value if required. */ static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr, __be16 ethertype) { if (skb->ip_summed == CHECKSUM_COMPLETE) { __be16 diff[] = { ~hdr->h_proto, ethertype }; skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum); } hdr->h_proto = ethertype; } /** * skb_mpls_push() - push a new MPLS header after mac_len bytes from start of * the packet * * @skb: buffer * @mpls_lse: MPLS label stack entry to push * @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848) * @mac_len: length of the MAC header * @ethernet: flag to indicate if the resulting packet after skb_mpls_push is * ethernet * * Expects skb->data at mac header. * * Returns 0 on success, -errno otherwise. */ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, int mac_len, bool ethernet) { struct mpls_shim_hdr *lse; int err; if (unlikely(!eth_p_mpls(mpls_proto))) return -EINVAL; /* Networking stack does not allow simultaneous Tunnel and MPLS GSO. */ if (skb->encapsulation) return -EINVAL; err = skb_cow_head(skb, MPLS_HLEN); if (unlikely(err)) return err; if (!skb->inner_protocol) { skb_set_inner_network_header(skb, skb_network_offset(skb)); skb_set_inner_protocol(skb, skb->protocol); } skb_push(skb, MPLS_HLEN); memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), mac_len); skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); skb_reset_mac_len(skb); lse = mpls_hdr(skb); lse->label_stack_entry = mpls_lse; skb_postpush_rcsum(skb, lse, MPLS_HLEN); if (ethernet && mac_len >= ETH_HLEN) skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto); skb->protocol = mpls_proto; return 0; } EXPORT_SYMBOL_GPL(skb_mpls_push); /** * skb_mpls_pop() - pop the outermost MPLS header * * @skb: buffer * @next_proto: ethertype of header after popped MPLS header * @mac_len: length of the MAC header * @ethernet: flag to indicate if the packet is ethernet * * Expects skb->data at mac header. * * Returns 0 on success, -errno otherwise. */ int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len, bool ethernet) { int err; if (unlikely(!eth_p_mpls(skb->protocol))) return 0; err = skb_ensure_writable(skb, mac_len + MPLS_HLEN); if (unlikely(err)) return err; skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN); memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), mac_len); __skb_pull(skb, MPLS_HLEN); skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); if (ethernet && mac_len >= ETH_HLEN) { struct ethhdr *hdr; /* use mpls_hdr() to get ethertype to account for VLANs. */ hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN); skb_mod_eth_type(skb, hdr, next_proto); } skb->protocol = next_proto; return 0; } EXPORT_SYMBOL_GPL(skb_mpls_pop); /** * skb_mpls_update_lse() - modify outermost MPLS header and update csum * * @skb: buffer * @mpls_lse: new MPLS label stack entry to update to * * Expects skb->data at mac header. * * Returns 0 on success, -errno otherwise. */ int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse) { int err; if (unlikely(!eth_p_mpls(skb->protocol))) return -EINVAL; err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN); if (unlikely(err)) return err; if (skb->ip_summed == CHECKSUM_COMPLETE) { __be32 diff[] = { ~mpls_hdr(skb)->label_stack_entry, mpls_lse }; skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum); } mpls_hdr(skb)->label_stack_entry = mpls_lse; return 0; } EXPORT_SYMBOL_GPL(skb_mpls_update_lse); /** * skb_mpls_dec_ttl() - decrement the TTL of the outermost MPLS header * * @skb: buffer * * Expects skb->data at mac header. * * Returns 0 on success, -errno otherwise. */ int skb_mpls_dec_ttl(struct sk_buff *skb) { u32 lse; u8 ttl; if (unlikely(!eth_p_mpls(skb->protocol))) return -EINVAL; if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN)) return -ENOMEM; lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry); ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT; if (!--ttl) return -EINVAL; lse &= ~MPLS_LS_TTL_MASK; lse |= ttl << MPLS_LS_TTL_SHIFT; return skb_mpls_update_lse(skb, cpu_to_be32(lse)); } EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl); /** * alloc_skb_with_frags - allocate skb with page frags * * @header_len: size of linear part * @data_len: needed length in frags * @max_page_order: max page order desired. * @errcode: pointer to error code if any * @gfp_mask: allocation mask * * This can be used to allocate a paged skb, given a maximal order for frags. */ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, unsigned long data_len, int max_page_order, int *errcode, gfp_t gfp_mask) { int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; unsigned long chunk; struct sk_buff *skb; struct page *page; int i; *errcode = -EMSGSIZE; /* Note this test could be relaxed, if we succeed to allocate * high order pages... */ if (npages > MAX_SKB_FRAGS) return NULL; *errcode = -ENOBUFS; skb = alloc_skb(header_len, gfp_mask); if (!skb) return NULL; skb->truesize += npages << PAGE_SHIFT; for (i = 0; npages > 0; i++) { int order = max_page_order; while (order) { if (npages >= 1 << order) { page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | __GFP_NOWARN, order); if (page) goto fill_page; /* Do not retry other high order allocations */ order = 1; max_page_order = 0; } order--; } page = alloc_page(gfp_mask); if (!page) goto failure; fill_page: chunk = min_t(unsigned long, data_len, PAGE_SIZE << order); skb_fill_page_desc(skb, i, page, 0, chunk); data_len -= chunk; npages -= 1 << order; } return skb; failure: kfree_skb(skb); return NULL; } EXPORT_SYMBOL(alloc_skb_with_frags); /* carve out the first off bytes from skb when off < headlen */ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, const int headlen, gfp_t gfp_mask) { int i; int size = skb_end_offset(skb); int new_hlen = headlen - off; u8 *data; size = SKB_DATA_ALIGN(size); if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), gfp_mask, NUMA_NO_NODE, NULL); if (!data) return -ENOMEM; size = SKB_WITH_OVERHEAD(ksize(data)); /* Copy real data, and all frags */ skb_copy_from_linear_data_offset(skb, off, data, new_hlen); skb->len -= off; memcpy((struct skb_shared_info *)(data + size), skb_shinfo(skb), offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); if (skb_cloned(skb)) { /* drop the old head gracefully */ if (skb_orphan_frags(skb, gfp_mask)) { kfree(data); return -ENOMEM; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) skb_frag_ref(skb, i); if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); skb_release_data(skb); } else { /* we can reuse existing recount- all we did was * relocate values */ skb_free_head(skb); } skb->head = data; skb->data = data; skb->head_frag = 0; #ifdef NET_SKBUFF_DATA_USES_OFFSET skb->end = size; #else skb->end = skb->head + size; #endif skb_set_tail_pointer(skb, skb_headlen(skb)); skb_headers_offset_update(skb, 0); skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); return 0; } static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp); /* carve out the first eat bytes from skb's frag_list. May recurse into * pskb_carve() */ static int pskb_carve_frag_list(struct sk_buff *skb, struct skb_shared_info *shinfo, int eat, gfp_t gfp_mask) { struct sk_buff *list = shinfo->frag_list; struct sk_buff *clone = NULL; struct sk_buff *insp = NULL; do { if (!list) { pr_err("Not enough bytes to eat. Want %d\n", eat); return -EFAULT; } if (list->len <= eat) { /* Eaten as whole. */ eat -= list->len; list = list->next; insp = list; } else { /* Eaten partially. */ if (skb_shared(list)) { clone = skb_clone(list, gfp_mask); if (!clone) return -ENOMEM; insp = list->next; list = clone; } else { /* This may be pulled without problems. */ insp = list; } if (pskb_carve(list, eat, gfp_mask) < 0) { kfree_skb(clone); return -ENOMEM; } break; } } while (eat); /* Free pulled out fragments. */ while ((list = shinfo->frag_list) != insp) { shinfo->frag_list = list->next; kfree_skb(list); } /* And insert new clone at head. */ if (clone) { clone->next = list; shinfo->frag_list = clone; } return 0; } /* carve off first len bytes from skb. Split line (off) is in the * non-linear part of skb */ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, int pos, gfp_t gfp_mask) { int i, k = 0; int size = skb_end_offset(skb); u8 *data; const int nfrags = skb_shinfo(skb)->nr_frags; struct skb_shared_info *shinfo; size = SKB_DATA_ALIGN(size); if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), gfp_mask, NUMA_NO_NODE, NULL); if (!data) return -ENOMEM; size = SKB_WITH_OVERHEAD(ksize(data)); memcpy((struct skb_shared_info *)(data + size), skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0])); if (skb_orphan_frags(skb, gfp_mask)) { kfree(data); return -ENOMEM; } shinfo = (struct skb_shared_info *)(data + size); for (i = 0; i < nfrags; i++) { int fsize = skb_frag_size(&skb_shinfo(skb)->frags[i]); if (pos + fsize > off) { shinfo->frags[k] = skb_shinfo(skb)->frags[i]; if (pos < off) { /* Split frag. * We have two variants in this case: * 1. Move all the frag to the second * part, if it is possible. F.e. * this approach is mandatory for TUX, * where splitting is expensive. * 2. Split is accurately. We make this. */ skb_frag_off_add(&shinfo->frags[0], off - pos); skb_frag_size_sub(&shinfo->frags[0], off - pos); } skb_frag_ref(skb, i); k++; } pos += fsize; } shinfo->nr_frags = k; if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); /* split line is in frag list */ if (k == 0 && pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask)) { /* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */ if (skb_has_frag_list(skb)) kfree_skb_list(skb_shinfo(skb)->frag_list); kfree(data); return -ENOMEM; } skb_release_data(skb); skb->head = data; skb->head_frag = 0; skb->data = data; #ifdef NET_SKBUFF_DATA_USES_OFFSET skb->end = size; #else skb->end = skb->head + size; #endif skb_reset_tail_pointer(skb); skb_headers_offset_update(skb, 0); skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; skb->len -= off; skb->data_len = skb->len; atomic_set(&skb_shinfo(skb)->dataref, 1); return 0; } /* remove len bytes from the beginning of the skb */ static int pskb_carve(struct sk_buff *skb, const u32 len, gfp_t gfp) { int headlen = skb_headlen(skb); if (len < headlen) return pskb_carve_inside_header(skb, len, headlen, gfp); else return pskb_carve_inside_nonlinear(skb, len, headlen, gfp); } /* Extract to_copy bytes starting at off from skb, and return this in * a new skb */ struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy, gfp_t gfp) { struct sk_buff *clone = skb_clone(skb, gfp); if (!clone) return NULL; if (pskb_carve(clone, off, gfp) < 0 || pskb_trim(clone, to_copy)) { kfree_skb(clone); return NULL; } return clone; } EXPORT_SYMBOL(pskb_extract); /** * skb_condense - try to get rid of fragments/frag_list if possible * @skb: buffer * * Can be used to save memory before skb is added to a busy queue. * If packet has bytes in frags and enough tail room in skb->head, * pull all of them, so that we can free the frags right now and adjust * truesize. * Notes: * We do not reallocate skb->head thus can not fail. * Caller must re-evaluate skb->truesize if needed. */ void skb_condense(struct sk_buff *skb) { if (skb->data_len) { if (skb->data_len > skb->end - skb->tail || skb_cloned(skb)) return; /* Nice, we can free page frag(s) right now */ __pskb_pull_tail(skb, skb->data_len); } /* At this point, skb->truesize might be over estimated, * because skb had a fragment, and fragments do not tell * their truesize. * When we pulled its content into skb->head, fragment * was freed, but __pskb_pull_tail() could not possibly * adjust skb->truesize, not knowing the frag truesize. */ skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); } #ifdef CONFIG_SKB_EXTENSIONS static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id) { return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE); } /** * __skb_ext_alloc - allocate a new skb extensions storage * * @flags: See kmalloc(). * * Returns the newly allocated pointer. The pointer can later attached to a * skb via __skb_ext_set(). * Note: caller must handle the skb_ext as an opaque data. */ struct skb_ext *__skb_ext_alloc(gfp_t flags) { struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, flags); if (new) { memset(new->offset, 0, sizeof(new->offset)); refcount_set(&new->refcnt, 1); } return new; } static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old, unsigned int old_active) { struct skb_ext *new; if (refcount_read(&old->refcnt) == 1) return old; new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC); if (!new) return NULL; memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE); refcount_set(&new->refcnt, 1); #ifdef CONFIG_XFRM if (old_active & (1 << SKB_EXT_SEC_PATH)) { struct sec_path *sp = skb_ext_get_ptr(old, SKB_EXT_SEC_PATH); unsigned int i; for (i = 0; i < sp->len; i++) xfrm_state_hold(sp->xvec[i]); } #endif __skb_ext_put(old); return new; } /** * __skb_ext_set - attach the specified extension storage to this skb * @skb: buffer * @id: extension id * @ext: extension storage previously allocated via __skb_ext_alloc() * * Existing extensions, if any, are cleared. * * Returns the pointer to the extension. */ void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id, struct skb_ext *ext) { unsigned int newlen, newoff = SKB_EXT_CHUNKSIZEOF(*ext); skb_ext_put(skb); newlen = newoff + skb_ext_type_len[id]; ext->chunks = newlen; ext->offset[id] = newoff; skb->extensions = ext; skb->active_extensions = 1 << id; return skb_ext_get_ptr(ext, id); } /** * skb_ext_add - allocate space for given extension, COW if needed * @skb: buffer * @id: extension to allocate space for * * Allocates enough space for the given extension. * If the extension is already present, a pointer to that extension * is returned. * * If the skb was cloned, COW applies and the returned memory can be * modified without changing the extension space of clones buffers. * * Returns pointer to the extension or NULL on allocation failure. */ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id) { struct skb_ext *new, *old = NULL; unsigned int newlen, newoff; if (skb->active_extensions) { old = skb->extensions; new = skb_ext_maybe_cow(old, skb->active_extensions); if (!new) return NULL; if (__skb_ext_exist(new, id)) goto set_active; newoff = new->chunks; } else { newoff = SKB_EXT_CHUNKSIZEOF(*new); new = __skb_ext_alloc(GFP_ATOMIC); if (!new) return NULL; } newlen = newoff + skb_ext_type_len[id]; new->chunks = newlen; new->offset[id] = newoff; set_active: skb->extensions = new; skb->active_extensions |= 1 << id; return skb_ext_get_ptr(new, id); } EXPORT_SYMBOL(skb_ext_add); #ifdef CONFIG_XFRM static void skb_ext_put_sp(struct sec_path *sp) { unsigned int i; for (i = 0; i < sp->len; i++) xfrm_state_put(sp->xvec[i]); } #endif void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id) { struct skb_ext *ext = skb->extensions; skb->active_extensions &= ~(1 << id); if (skb->active_extensions == 0) { skb->extensions = NULL; __skb_ext_put(ext); #ifdef CONFIG_XFRM } else if (id == SKB_EXT_SEC_PATH && refcount_read(&ext->refcnt) == 1) { struct sec_path *sp = skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH); skb_ext_put_sp(sp); sp->len = 0; #endif } } EXPORT_SYMBOL(__skb_ext_del); void __skb_ext_put(struct skb_ext *ext) { /* If this is last clone, nothing can increment * it after check passes. Avoids one atomic op. */ if (refcount_read(&ext->refcnt) == 1) goto free_now; if (!refcount_dec_and_test(&ext->refcnt)) return; free_now: #ifdef CONFIG_XFRM if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH)) skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH)); #endif kmem_cache_free(skbuff_ext_cache, ext); } EXPORT_SYMBOL(__skb_ext_put); #endif /* CONFIG_SKB_EXTENSIONS */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 #ifndef _LINUX_HASH_H #define _LINUX_HASH_H /* Fast hashing routine for ints, longs and pointers. (C) 2002 Nadia Yvette Chambers, IBM */ #include <asm/types.h> #include <linux/compiler.h> /* * The "GOLDEN_RATIO_PRIME" is used in ifs/btrfs/brtfs_inode.h and * fs/inode.c. It's not actually prime any more (the previous primes * were actively bad for hashing), but the name remains. */ #if BITS_PER_LONG == 32 #define GOLDEN_RATIO_PRIME GOLDEN_RATIO_32 #define hash_long(val, bits) hash_32(val, bits) #elif BITS_PER_LONG == 64 #define hash_long(val, bits) hash_64(val, bits) #define GOLDEN_RATIO_PRIME GOLDEN_RATIO_64 #else #error Wordsize not 32 or 64 #endif /* * This hash multiplies the input by a large odd number and takes the * high bits. Since multiplication propagates changes to the most * significant end only, it is essential that the high bits of the * product be used for the hash value. * * Chuck Lever verified the effectiveness of this technique: * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf * * Although a random odd number will do, it turns out that the golden * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice * properties. (See Knuth vol 3, section 6.4, exercise 9.) * * These are the negative, (1 - phi) = phi**2 = (3 - sqrt(5))/2, * which is very slightly easier to multiply by and makes no * difference to the hash distribution. */ #define GOLDEN_RATIO_32 0x61C88647 #define GOLDEN_RATIO_64 0x61C8864680B583EBull #ifdef CONFIG_HAVE_ARCH_HASH /* This header may use the GOLDEN_RATIO_xx constants */ #include <asm/hash.h> #endif /* * The _generic versions exist only so lib/test_hash.c can compare * the arch-optimized versions with the generic. * * Note that if you change these, any <asm/hash.h> that aren't updated * to match need to have their HAVE_ARCH_* define values updated so the * self-test will not false-positive. */ #ifndef HAVE_ARCH__HASH_32 #define __hash_32 __hash_32_generic #endif static inline u32 __hash_32_generic(u32 val) { return val * GOLDEN_RATIO_32; } #ifndef HAVE_ARCH_HASH_32 #define hash_32 hash_32_generic #endif static inline u32 hash_32_generic(u32 val, unsigned int bits) { /* High bits are more random, so use them. */ return __hash_32(val) >> (32 - bits); } #ifndef HAVE_ARCH_HASH_64 #define hash_64 hash_64_generic #endif static __always_inline u32 hash_64_generic(u64 val, unsigned int bits) { #if BITS_PER_LONG == 64 /* 64x64-bit multiply is efficient on all 64-bit processors */ return val * GOLDEN_RATIO_64 >> (64 - bits); #else /* Hash 64 bits using only 32x32-bit multiply. */ return hash_32((u32)val ^ __hash_32(val >> 32), bits); #endif } static inline u32 hash_ptr(const void *ptr, unsigned int bits) { return hash_long((unsigned long)ptr, bits); } /* This really should be called fold32_ptr; it does no hashing to speak of. */ static inline u32 hash32_ptr(const void *ptr) { unsigned long val = (unsigned long)ptr; #if BITS_PER_LONG == 64 val ^= (val >> 32); #endif return (u32)val; } #endif /* _LINUX_HASH_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 /* SPDX-License-Identifier: GPL-2.0 */ /* * Security server interface. * * Author : Stephen Smalley, <sds@tycho.nsa.gov> * */ #ifndef _SELINUX_SECURITY_H_ #define _SELINUX_SECURITY_H_ #include <linux/compiler.h> #include <linux/dcache.h> #include <linux/magic.h> #include <linux/types.h> #include <linux/rcupdate.h> #include <linux/refcount.h> #include <linux/workqueue.h> #include "flask.h" #include "policycap.h" #define SECSID_NULL 0x00000000 /* unspecified SID */ #define SECSID_WILD 0xffffffff /* wildcard SID */ #define SECCLASS_NULL 0x0000 /* no class */ /* Identify specific policy version changes */ #define POLICYDB_VERSION_BASE 15 #define POLICYDB_VERSION_BOOL 16 #define POLICYDB_VERSION_IPV6 17 #define POLICYDB_VERSION_NLCLASS 18 #define POLICYDB_VERSION_VALIDATETRANS 19 #define POLICYDB_VERSION_MLS 19 #define POLICYDB_VERSION_AVTAB 20 #define POLICYDB_VERSION_RANGETRANS 21 #define POLICYDB_VERSION_POLCAP 22 #define POLICYDB_VERSION_PERMISSIVE 23 #define POLICYDB_VERSION_BOUNDARY 24 #define POLICYDB_VERSION_FILENAME_TRANS 25 #define POLICYDB_VERSION_ROLETRANS 26 #define POLICYDB_VERSION_NEW_OBJECT_DEFAULTS 27 #define POLICYDB_VERSION_DEFAULT_TYPE 28 #define POLICYDB_VERSION_CONSTRAINT_NAMES 29 #define POLICYDB_VERSION_XPERMS_IOCTL 30 #define POLICYDB_VERSION_INFINIBAND 31 #define POLICYDB_VERSION_GLBLUB 32 #define POLICYDB_VERSION_COMP_FTRANS 33 /* compressed filename transitions */ /* Range of policy versions we understand*/ #define POLICYDB_VERSION_MIN POLICYDB_VERSION_BASE #define POLICYDB_VERSION_MAX POLICYDB_VERSION_COMP_FTRANS /* Mask for just the mount related flags */ #define SE_MNTMASK 0x0f /* Super block security struct flags for mount options */ /* BE CAREFUL, these need to be the low order bits for selinux_get_mnt_opts */ #define CONTEXT_MNT 0x01 #define FSCONTEXT_MNT 0x02 #define ROOTCONTEXT_MNT 0x04 #define DEFCONTEXT_MNT 0x08 #define SBLABEL_MNT 0x10 /* Non-mount related flags */ #define SE_SBINITIALIZED 0x0100 #define SE_SBPROC 0x0200 #define SE_SBGENFS 0x0400 #define SE_SBGENFS_XATTR 0x0800 #define CONTEXT_STR "context" #define FSCONTEXT_STR "fscontext" #define ROOTCONTEXT_STR "rootcontext" #define DEFCONTEXT_STR "defcontext" #define SECLABEL_STR "seclabel" struct netlbl_lsm_secattr; extern int selinux_enabled_boot; /* * type_datum properties * available at the kernel policy version >= POLICYDB_VERSION_BOUNDARY */ #define TYPEDATUM_PROPERTY_PRIMARY 0x0001 #define TYPEDATUM_PROPERTY_ATTRIBUTE 0x0002 /* limitation of boundary depth */ #define POLICYDB_BOUNDS_MAXDEPTH 4 struct selinux_avc; struct selinux_policy; struct selinux_state { #ifdef CONFIG_SECURITY_SELINUX_DISABLE bool disabled; #endif #ifdef CONFIG_SECURITY_SELINUX_DEVELOP bool enforcing; #endif bool checkreqprot; bool initialized; bool policycap[__POLICYDB_CAPABILITY_MAX]; struct page *status_page; struct mutex status_lock; struct selinux_avc *avc; struct selinux_policy __rcu *policy; struct mutex policy_mutex; } __randomize_layout; void selinux_avc_init(struct selinux_avc **avc); extern struct selinux_state selinux_state; static inline bool selinux_initialized(const struct selinux_state *state) { /* do a synchronized load to avoid race conditions */ return smp_load_acquire(&state->initialized); } static inline void selinux_mark_initialized(struct selinux_state *state) { /* do a synchronized write to avoid race conditions */ smp_store_release(&state->initialized, true); } #ifdef CONFIG_SECURITY_SELINUX_DEVELOP static inline bool enforcing_enabled(struct selinux_state *state) { return READ_ONCE(state->enforcing); } static inline void enforcing_set(struct selinux_state *state, bool value) { WRITE_ONCE(state->enforcing, value); } #else static inline bool enforcing_enabled(struct selinux_state *state) { return true; } static inline void enforcing_set(struct selinux_state *state, bool value) { } #endif static inline bool checkreqprot_get(const struct selinux_state *state) { return READ_ONCE(state->checkreqprot); } static inline void checkreqprot_set(struct selinux_state *state, bool value) { WRITE_ONCE(state->checkreqprot, value); } #ifdef CONFIG_SECURITY_SELINUX_DISABLE static inline bool selinux_disabled(struct selinux_state *state) { return READ_ONCE(state->disabled); } static inline void selinux_mark_disabled(struct selinux_state *state) { WRITE_ONCE(state->disabled, true); } #else static inline bool selinux_disabled(struct selinux_state *state) { return false; } #endif static inline bool selinux_policycap_netpeer(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_NETPEER]); } static inline bool selinux_policycap_openperm(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_OPENPERM]); } static inline bool selinux_policycap_extsockclass(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_EXTSOCKCLASS]); } static inline bool selinux_policycap_alwaysnetwork(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_ALWAYSNETWORK]); } static inline bool selinux_policycap_cgroupseclabel(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_CGROUPSECLABEL]); } static inline bool selinux_policycap_nnp_nosuid_transition(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION]); } static inline bool selinux_policycap_genfs_seclabel_symlinks(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS]); } struct selinux_policy_convert_data; struct selinux_load_state { struct selinux_policy *policy; struct selinux_policy_convert_data *convert_data; }; int security_mls_enabled(struct selinux_state *state); int security_load_policy(struct selinux_state *state, void *data, size_t len, struct selinux_load_state *load_state); void selinux_policy_commit(struct selinux_state *state, struct selinux_load_state *load_state); void selinux_policy_cancel(struct selinux_state *state, struct selinux_load_state *load_state); int security_read_policy(struct selinux_state *state, void **data, size_t *len); int security_policycap_supported(struct selinux_state *state, unsigned int req_cap); #define SEL_VEC_MAX 32 struct av_decision { u32 allowed; u32 auditallow; u32 auditdeny; u32 seqno; u32 flags; }; #define XPERMS_ALLOWED 1 #define XPERMS_AUDITALLOW 2 #define XPERMS_DONTAUDIT 4 #define security_xperm_set(perms, x) (perms[x >> 5] |= 1 << (x & 0x1f)) #define security_xperm_test(perms, x) (1 & (perms[x >> 5] >> (x & 0x1f))) struct extended_perms_data { u32 p[8]; }; struct extended_perms_decision { u8 used; u8 driver; struct extended_perms_data *allowed; struct extended_perms_data *auditallow; struct extended_perms_data *dontaudit; }; struct extended_perms { u16 len; /* length associated decision chain */ struct extended_perms_data drivers; /* flag drivers that are used */ }; /* definitions of av_decision.flags */ #define AVD_FLAGS_PERMISSIVE 0x0001 void security_compute_av(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, struct av_decision *avd, struct extended_perms *xperms); void security_compute_xperms_decision(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u8 driver, struct extended_perms_decision *xpermd); void security_compute_av_user(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, struct av_decision *avd); int security_transition_sid(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, const struct qstr *qstr, u32 *out_sid); int security_transition_sid_user(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, const char *objname, u32 *out_sid); int security_member_sid(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 *out_sid); int security_change_sid(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 *out_sid); int security_sid_to_context(struct selinux_state *state, u32 sid, char **scontext, u32 *scontext_len); int security_sid_to_context_force(struct selinux_state *state, u32 sid, char **scontext, u32 *scontext_len); int security_sid_to_context_inval(struct selinux_state *state, u32 sid, char **scontext, u32 *scontext_len); int security_context_to_sid(struct selinux_state *state, const char *scontext, u32 scontext_len, u32 *out_sid, gfp_t gfp); int security_context_str_to_sid(struct selinux_state *state, const char *scontext, u32 *out_sid, gfp_t gfp); int security_context_to_sid_default(struct selinux_state *state, const char *scontext, u32 scontext_len, u32 *out_sid, u32 def_sid, gfp_t gfp_flags); int security_context_to_sid_force(struct selinux_state *state, const char *scontext, u32 scontext_len, u32 *sid); int security_get_user_sids(struct selinux_state *state, u32 callsid, char *username, u32 **sids, u32 *nel); int security_port_sid(struct selinux_state *state, u8 protocol, u16 port, u32 *out_sid); int security_ib_pkey_sid(struct selinux_state *state, u64 subnet_prefix, u16 pkey_num, u32 *out_sid); int security_ib_endport_sid(struct selinux_state *state, const char *dev_name, u8 port_num, u32 *out_sid); int security_netif_sid(struct selinux_state *state, char *name, u32 *if_sid); int security_node_sid(struct selinux_state *state, u16 domain, void *addr, u32 addrlen, u32 *out_sid); int security_validate_transition(struct selinux_state *state, u32 oldsid, u32 newsid, u32 tasksid, u16 tclass); int security_validate_transition_user(struct selinux_state *state, u32 oldsid, u32 newsid, u32 tasksid, u16 tclass); int security_bounded_transition(struct selinux_state *state, u32 oldsid, u32 newsid); int security_sid_mls_copy(struct selinux_state *state, u32 sid, u32 mls_sid, u32 *new_sid); int security_net_peersid_resolve(struct selinux_state *state, u32 nlbl_sid, u32 nlbl_type, u32 xfrm_sid, u32 *peer_sid); int security_get_classes(struct selinux_policy *policy, char ***classes, int *nclasses); int security_get_permissions(struct selinux_policy *policy, char *class, char ***perms, int *nperms); int security_get_reject_unknown(struct selinux_state *state); int security_get_allow_unknown(struct selinux_state *state); #define SECURITY_FS_USE_XATTR 1 /* use xattr */ #define SECURITY_FS_USE_TRANS 2 /* use transition SIDs, e.g. devpts/tmpfs */ #define SECURITY_FS_USE_TASK 3 /* use task SIDs, e.g. pipefs/sockfs */ #define SECURITY_FS_USE_GENFS 4 /* use the genfs support */ #define SECURITY_FS_USE_NONE 5 /* no labeling support */ #define SECURITY_FS_USE_MNTPOINT 6 /* use mountpoint labeling */ #define SECURITY_FS_USE_NATIVE 7 /* use native label support */ #define SECURITY_FS_USE_MAX 7 /* Highest SECURITY_FS_USE_XXX */ int security_fs_use(struct selinux_state *state, struct super_block *sb); int security_genfs_sid(struct selinux_state *state, const char *fstype, char *name, u16 sclass, u32 *sid); int selinux_policy_genfs_sid(struct selinux_policy *policy, const char *fstype, char *name, u16 sclass, u32 *sid); #ifdef CONFIG_NETLABEL int security_netlbl_secattr_to_sid(struct selinux_state *state, struct netlbl_lsm_secattr *secattr, u32 *sid); int security_netlbl_sid_to_secattr(struct selinux_state *state, u32 sid, struct netlbl_lsm_secattr *secattr); #else static inline int security_netlbl_secattr_to_sid(struct selinux_state *state, struct netlbl_lsm_secattr *secattr, u32 *sid) { return -EIDRM; } static inline int security_netlbl_sid_to_secattr(struct selinux_state *state, u32 sid, struct netlbl_lsm_secattr *secattr) { return -ENOENT; } #endif /* CONFIG_NETLABEL */ const char *security_get_initial_sid_context(u32 sid); /* * status notifier using mmap interface */ extern struct page *selinux_kernel_status_page(struct selinux_state *state); #define SELINUX_KERNEL_STATUS_VERSION 1 struct selinux_kernel_status { u32 version; /* version number of thie structure */ u32 sequence; /* sequence number of seqlock logic */ u32 enforcing; /* current setting of enforcing mode */ u32 policyload; /* times of policy reloaded */ u32 deny_unknown; /* current setting of deny_unknown */ /* * The version > 0 supports above members. */ } __packed; extern void selinux_status_update_setenforce(struct selinux_state *state, int enforcing); extern void selinux_status_update_policyload(struct selinux_state *state, int seqno); extern void selinux_complete_init(void); extern int selinux_disable(struct selinux_state *state); extern void exit_sel_fs(void); extern struct path selinux_null; extern struct vfsmount *selinuxfs_mount; extern void selnl_notify_setenforce(int val); extern void selnl_notify_policyload(u32 seqno); extern int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm); extern void avtab_cache_init(void); extern void ebitmap_cache_init(void); extern void hashtab_cache_init(void); extern int security_sidtab_hash_stats(struct selinux_state *state, char *page); #endif /* _SELINUX_SECURITY_H_ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM percpu #if !defined(_TRACE_PERCPU_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_PERCPU_H #include <linux/tracepoint.h> TRACE_EVENT(percpu_alloc_percpu, TP_PROTO(bool reserved, bool is_atomic, size_t size, size_t align, void *base_addr, int off, void __percpu *ptr), TP_ARGS(reserved, is_atomic, size, align, base_addr, off, ptr), TP_STRUCT__entry( __field( bool, reserved ) __field( bool, is_atomic ) __field( size_t, size ) __field( size_t, align ) __field( void *, base_addr ) __field( int, off ) __field( void __percpu *, ptr ) ), TP_fast_assign( __entry->reserved = reserved; __entry->is_atomic = is_atomic; __entry->size = size; __entry->align = align; __entry->base_addr = base_addr; __entry->off = off; __entry->ptr = ptr; ), TP_printk("reserved=%d is_atomic=%d size=%zu align=%zu base_addr=%p off=%d ptr=%p", __entry->reserved, __entry->is_atomic, __entry->size, __entry->align, __entry->base_addr, __entry->off, __entry->ptr) ); TRACE_EVENT(percpu_free_percpu, TP_PROTO(void *base_addr, int off, void __percpu *ptr), TP_ARGS(base_addr, off, ptr), TP_STRUCT__entry( __field( void *, base_addr ) __field( int, off ) __field( void __percpu *, ptr ) ), TP_fast_assign( __entry->base_addr = base_addr; __entry->off = off; __entry->ptr = ptr; ), TP_printk("base_addr=%p off=%d ptr=%p", __entry->base_addr, __entry->off, __entry->ptr) ); TRACE_EVENT(percpu_alloc_percpu_fail, TP_PROTO(bool reserved, bool is_atomic, size_t size, size_t align), TP_ARGS(reserved, is_atomic, size, align), TP_STRUCT__entry( __field( bool, reserved ) __field( bool, is_atomic ) __field( size_t, size ) __field( size_t, align ) ), TP_fast_assign( __entry->reserved = reserved; __entry->is_atomic = is_atomic; __entry->size = size; __entry->align = align; ), TP_printk("reserved=%d is_atomic=%d size=%zu align=%zu", __entry->reserved, __entry->is_atomic, __entry->size, __entry->align) ); TRACE_EVENT(percpu_create_chunk, TP_PROTO(void *base_addr), TP_ARGS(base_addr), TP_STRUCT__entry( __field( void *, base_addr ) ), TP_fast_assign( __entry->base_addr = base_addr; ), TP_printk("base_addr=%p", __entry->base_addr) ); TRACE_EVENT(percpu_destroy_chunk, TP_PROTO(void *base_addr), TP_ARGS(base_addr), TP_STRUCT__entry( __field( void *, base_addr ) ), TP_fast_assign( __entry->base_addr = base_addr; ), TP_printk("base_addr=%p", __entry->base_addr) ); #endif /* _TRACE_PERCPU_H */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 /* SPDX-License-Identifier: GPL-2.0 */ /* thread_info.h: common low-level thread information accessors * * Copyright (C) 2002 David Howells (dhowells@redhat.com) * - Incorporating suggestions made by Linus Torvalds */ #ifndef _LINUX_THREAD_INFO_H #define _LINUX_THREAD_INFO_H #include <linux/types.h> #include <linux/bug.h> #include <linux/restart_block.h> #include <linux/errno.h> #ifdef CONFIG_THREAD_INFO_IN_TASK /* * For CONFIG_THREAD_INFO_IN_TASK kernels we need <asm/current.h> for the * definition of current, but for !CONFIG_THREAD_INFO_IN_TASK kernels, * including <asm/current.h> can cause a circular dependency on some platforms. */ #include <asm/current.h> #define current_thread_info() ((struct thread_info *)current) #endif #include <linux/bitops.h> /* * For per-arch arch_within_stack_frames() implementations, defined in * asm/thread_info.h. */ enum { BAD_STACK = -1, NOT_STACK = 0, GOOD_FRAME, GOOD_STACK, }; #include <asm/thread_info.h> #ifdef __KERNEL__ #ifndef arch_set_restart_data #define arch_set_restart_data(restart) do { } while (0) #endif static inline long set_restart_fn(struct restart_block *restart, long (*fn)(struct restart_block *)) { restart->fn = fn; arch_set_restart_data(restart); return -ERESTART_RESTARTBLOCK; } #ifndef THREAD_ALIGN #define THREAD_ALIGN THREAD_SIZE #endif #define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) /* * flag set/clear/test wrappers * - pass TIF_xxxx constants to these functions */ static inline void set_ti_thread_flag(struct thread_info *ti, int flag) { set_bit(flag, (unsigned long *)&ti->flags); } static inline void clear_ti_thread_flag(struct thread_info *ti, int flag) { clear_bit(flag, (unsigned long *)&ti->flags); } static inline void update_ti_thread_flag(struct thread_info *ti, int flag, bool value) { if (value) set_ti_thread_flag(ti, flag); else clear_ti_thread_flag(ti, flag); } static inline int test_and_set_ti_thread_flag(struct thread_info *ti, int flag) { return test_and_set_bit(flag, (unsigned long *)&ti->flags); } static inline int test_and_clear_ti_thread_flag(struct thread_info *ti, int flag) { return test_and_clear_bit(flag, (unsigned long *)&ti->flags); } static inline int test_ti_thread_flag(struct thread_info *ti, int flag) { return test_bit(flag, (unsigned long *)&ti->flags); } #define set_thread_flag(flag) \ set_ti_thread_flag(current_thread_info(), flag) #define clear_thread_flag(flag) \ clear_ti_thread_flag(current_thread_info(), flag) #define update_thread_flag(flag, value) \ update_ti_thread_flag(current_thread_info(), flag, value) #define test_and_set_thread_flag(flag) \ test_and_set_ti_thread_flag(current_thread_info(), flag) #define test_and_clear_thread_flag(flag) \ test_and_clear_ti_thread_flag(current_thread_info(), flag) #define test_thread_flag(flag) \ test_ti_thread_flag(current_thread_info(), flag) #define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES static inline int arch_within_stack_frames(const void * const stack, const void * const stackend, const void *obj, unsigned long len) { return 0; } #endif #ifdef CONFIG_HARDENED_USERCOPY extern void __check_object_size(const void *ptr, unsigned long n, bool to_user); static __always_inline void check_object_size(const void *ptr, unsigned long n, bool to_user) { if (!__builtin_constant_p(n)) __check_object_size(ptr, n, to_user); } #else static inline void check_object_size(const void *ptr, unsigned long n, bool to_user) { } #endif /* CONFIG_HARDENED_USERCOPY */ extern void __compiletime_error("copy source size is too small") __bad_copy_from(void); extern void __compiletime_error("copy destination size is too small") __bad_copy_to(void); static inline void copy_overflow(int size, unsigned long count) { WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); } static __always_inline __must_check bool check_copy_size(const void *addr, size_t bytes, bool is_source) { int sz = __compiletime_object_size(addr); if (unlikely(sz >= 0 && sz < bytes)) { if (!__builtin_constant_p(bytes)) copy_overflow(sz, bytes); else if (is_source) __bad_copy_from(); else __bad_copy_to(); return false; } if (WARN_ON_ONCE(bytes > INT_MAX)) return false; check_object_size(addr, bytes, is_source); return true; } #ifndef arch_setup_new_exec static inline void arch_setup_new_exec(void) { } #endif #endif /* __KERNEL__ */ #endif /* _LINUX_THREAD_INFO_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 /* SPDX-License-Identifier: GPL-2.0 */ /* * Generic nexthop implementation * * Copyright (c) 2017-19 Cumulus Networks * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> */ #ifndef __LINUX_NEXTHOP_H #define __LINUX_NEXTHOP_H #include <linux/netdevice.h> #include <linux/notifier.h> #include <linux/route.h> #include <linux/types.h> #include <net/ip_fib.h> #include <net/ip6_fib.h> #include <net/netlink.h> #define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK struct nexthop; struct nh_config { u32 nh_id; u8 nh_family; u8 nh_protocol; u8 nh_blackhole; u8 nh_fdb; u32 nh_flags; int nh_ifindex; struct net_device *dev; union { __be32 ipv4; struct in6_addr ipv6; } gw; struct nlattr *nh_grp; u16 nh_grp_type; struct nlattr *nh_encap; u16 nh_encap_type; u32 nlflags; struct nl_info nlinfo; }; struct nh_info { struct hlist_node dev_hash; /* entry on netns devhash */ struct nexthop *nh_parent; u8 family; bool reject_nh; bool fdb_nh; union { struct fib_nh_common fib_nhc; struct fib_nh fib_nh; struct fib6_nh fib6_nh; }; }; struct nh_grp_entry { struct nexthop *nh; u8 weight; atomic_t upper_bound; struct list_head nh_list; struct nexthop *nh_parent; /* nexthop of group with this entry */ }; struct nh_group { struct nh_group *spare; /* spare group for removals */ u16 num_nh; bool mpath; bool fdb_nh; bool has_v4; struct nh_grp_entry nh_entries[]; }; struct nexthop { struct rb_node rb_node; /* entry on netns rbtree */ struct list_head fi_list; /* v4 entries using nh */ struct list_head f6i_list; /* v6 entries using nh */ struct list_head fdb_list; /* fdb entries using this nh */ struct list_head grp_list; /* nh group entries using this nh */ struct net *net; u32 id; u8 protocol; /* app managing this nh */ u8 nh_flags; bool is_group; refcount_t refcnt; struct rcu_head rcu; union { struct nh_info __rcu *nh_info; struct nh_group __rcu *nh_grp; }; }; enum nexthop_event_type { NEXTHOP_EVENT_DEL }; int register_nexthop_notifier(struct net *net, struct notifier_block *nb); int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); /* caller is holding rcu or rtnl; no reference taken to nexthop */ struct nexthop *nexthop_find_by_id(struct net *net, u32 id); void nexthop_free_rcu(struct rcu_head *head); static inline bool nexthop_get(struct nexthop *nh) { return refcount_inc_not_zero(&nh->refcnt); } static inline void nexthop_put(struct nexthop *nh) { if (refcount_dec_and_test(&nh->refcnt)) call_rcu(&nh->rcu, nexthop_free_rcu); } static inline bool nexthop_cmp(const struct nexthop *nh1, const struct nexthop *nh2) { return nh1 == nh2; } static inline bool nexthop_is_fdb(const struct nexthop *nh) { if (nh->is_group) { const struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); return nh_grp->fdb_nh; } else { const struct nh_info *nhi; nhi = rcu_dereference_rtnl(nh->nh_info); return nhi->fdb_nh; } } static inline bool nexthop_has_v4(const struct nexthop *nh) { if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); return nh_grp->has_v4; } return false; } static inline bool nexthop_is_multipath(const struct nexthop *nh) { if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); return nh_grp->mpath; } return false; } struct nexthop *nexthop_select_path(struct nexthop *nh, int hash); static inline unsigned int nexthop_num_path(const struct nexthop *nh) { unsigned int rc = 1; if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); if (nh_grp->mpath) rc = nh_grp->num_nh; } return rc; } static inline struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel) { /* for_nexthops macros in fib_semantics.c grabs a pointer to * the nexthop before checking nhsel */ if (nhsel >= nhg->num_nh) return NULL; return nhg->nh_entries[nhsel].nh; } static inline int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, u8 rt_family) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); int i; for (i = 0; i < nhg->num_nh; i++) { struct nexthop *nhe = nhg->nh_entries[i].nh; struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info); struct fib_nh_common *nhc = &nhi->fib_nhc; int weight = nhg->nh_entries[i].weight; if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0) return -EMSGSIZE; } return 0; } /* called with rcu lock */ static inline bool nexthop_is_blackhole(const struct nexthop *nh) { const struct nh_info *nhi; if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); if (nh_grp->num_nh > 1) return false; nh = nh_grp->nh_entries[0].nh; } nhi = rcu_dereference_rtnl(nh->nh_info); return nhi->reject_nh; } static inline void nexthop_path_fib_result(struct fib_result *res, int hash) { struct nh_info *nhi; struct nexthop *nh; nh = nexthop_select_path(res->fi->nh, hash); nhi = rcu_dereference(nh->nh_info); res->nhc = &nhi->fib_nhc; } /* called with rcu read lock or rtnl held */ static inline struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel) { struct nh_info *nhi; BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0); BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0); if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); if (nh_grp->mpath) { nh = nexthop_mpath_select(nh_grp, nhsel); if (!nh) return NULL; } } nhi = rcu_dereference_rtnl(nh->nh_info); return &nhi->fib_nhc; } /* called from fib_table_lookup with rcu_lock */ static inline struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh, int fib_flags, const struct flowi4 *flp, int *nhsel) { struct nh_info *nhi; if (nh->is_group) { struct nh_group *nhg = rcu_dereference(nh->nh_grp); int i; for (i = 0; i < nhg->num_nh; i++) { struct nexthop *nhe = nhg->nh_entries[i].nh; nhi = rcu_dereference(nhe->nh_info); if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { *nhsel = i; return &nhi->fib_nhc; } } } else { nhi = rcu_dereference(nh->nh_info); if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { *nhsel = 0; return &nhi->fib_nhc; } } return NULL; } static inline bool nexthop_uses_dev(const struct nexthop *nh, const struct net_device *dev) { struct nh_info *nhi; if (nh->is_group) { struct nh_group *nhg = rcu_dereference(nh->nh_grp); int i; for (i = 0; i < nhg->num_nh; i++) { struct nexthop *nhe = nhg->nh_entries[i].nh; nhi = rcu_dereference(nhe->nh_info); if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) return true; } } else { nhi = rcu_dereference(nh->nh_info); if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) return true; } return false; } static inline unsigned int fib_info_num_path(const struct fib_info *fi) { if (unlikely(fi->nh)) return nexthop_num_path(fi->nh); return fi->fib_nhs; } int fib_check_nexthop(struct nexthop *nh, u8 scope, struct netlink_ext_ack *extack); static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) { if (unlikely(fi->nh)) return nexthop_fib_nhc(fi->nh, nhsel); return &fi->fib_nh[nhsel].nh_common; } /* only used when fib_nh is built into fib_info */ static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel) { WARN_ON(fi->nh); return &fi->fib_nh[nhsel]; } /* * IPv6 variants */ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, struct netlink_ext_ack *extack); /* Caller should either hold rcu_read_lock(), or RTNL. */ static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) { struct nh_info *nhi; if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); nh = nexthop_mpath_select(nh_grp, 0); if (!nh) return NULL; } nhi = rcu_dereference_rtnl(nh->nh_info); if (nhi->family == AF_INET6) return &nhi->fib6_nh; return NULL; } /* Variant of nexthop_fib6_nh(). * Caller should either hold rcu_read_lock_bh(), or RTNL. */ static inline struct fib6_nh *nexthop_fib6_nh_bh(struct nexthop *nh) { struct nh_info *nhi; if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_bh_rtnl(nh->nh_grp); nh = nexthop_mpath_select(nh_grp, 0); if (!nh) return NULL; } nhi = rcu_dereference_bh_rtnl(nh->nh_info); if (nhi->family == AF_INET6) return &nhi->fib6_nh; return NULL; } static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i) { struct fib6_nh *fib6_nh; fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh; return fib6_nh->fib_nh_dev; } static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) { struct nexthop *nh = res->f6i->nh; struct nh_info *nhi; nh = nexthop_select_path(nh, hash); nhi = rcu_dereference_rtnl(nh->nh_info); if (nhi->reject_nh) { res->fib6_type = RTN_BLACKHOLE; res->fib6_flags |= RTF_REJECT; res->nh = nexthop_fib6_nh(nh); } else { res->nh = &nhi->fib6_nh; } } int nexthop_for_each_fib6_nh(struct nexthop *nh, int (*cb)(struct fib6_nh *nh, void *arg), void *arg); static inline int nexthop_get_family(struct nexthop *nh) { struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); return nhi->family; } static inline struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh) { struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); return &nhi->fib_nhc; } static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh, int hash) { struct nh_info *nhi; struct nexthop *nhp; nhp = nexthop_select_path(nh, hash); if (unlikely(!nhp)) return NULL; nhi = rcu_dereference(nhp->nh_info); return &nhi->fib_nhc; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _ASM_X86_INAT_H #define _ASM_X86_INAT_H /* * x86 instruction attributes * * Written by Masami Hiramatsu <mhiramat@redhat.com> */ #include <asm/inat_types.h> /* * Internal bits. Don't use bitmasks directly, because these bits are * unstable. You should use checking functions. */ #define INAT_OPCODE_TABLE_SIZE 256 #define INAT_GROUP_TABLE_SIZE 8 /* Legacy last prefixes */ #define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ #define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ #define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ /* Other Legacy prefixes */ #define INAT_PFX_LOCK 4 /* 0xF0 */ #define INAT_PFX_CS 5 /* 0x2E */ #define INAT_PFX_DS 6 /* 0x3E */ #define INAT_PFX_ES 7 /* 0x26 */ #define INAT_PFX_FS 8 /* 0x64 */ #define INAT_PFX_GS 9 /* 0x65 */ #define INAT_PFX_SS 10 /* 0x36 */ #define INAT_PFX_ADDRSZ 11 /* 0x67 */ /* x86-64 REX prefix */ #define INAT_PFX_REX 12 /* 0x4X */ /* AVX VEX prefixes */ #define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ #define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ #define INAT_PFX_EVEX 15 /* EVEX prefix */ #define INAT_LSTPFX_MAX 3 #define INAT_LGCPFX_MAX 11 /* Immediate size */ #define INAT_IMM_BYTE 1 #define INAT_IMM_WORD 2 #define INAT_IMM_DWORD 3 #define INAT_IMM_QWORD 4 #define INAT_IMM_PTR 5 #define INAT_IMM_VWORD32 6 #define INAT_IMM_VWORD 7 /* Legacy prefix */ #define INAT_PFX_OFFS 0 #define INAT_PFX_BITS 4 #define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) #define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) /* Escape opcodes */ #define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) #define INAT_ESC_BITS 2 #define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) #define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) /* Group opcodes (1-16) */ #define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) #define INAT_GRP_BITS 5 #define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) #define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) /* Immediates */ #define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) #define INAT_IMM_BITS 3 #define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) /* Flags */ #define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) #define INAT_MODRM (1 << (INAT_FLAG_OFFS)) #define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) #define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) #define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) #define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) #define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) #define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) #define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7)) /* Attribute making macros for attribute tables */ #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) #define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) #define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) /* Identifiers for segment registers */ #define INAT_SEG_REG_IGNORE 0 #define INAT_SEG_REG_DEFAULT 1 #define INAT_SEG_REG_CS 2 #define INAT_SEG_REG_SS 3 #define INAT_SEG_REG_DS 4 #define INAT_SEG_REG_ES 5 #define INAT_SEG_REG_FS 6 #define INAT_SEG_REG_GS 7 /* Attribute search APIs */ extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); extern int inat_get_last_prefix_id(insn_byte_t last_pfx); extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, insn_attr_t esc_attr); extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, insn_attr_t esc_attr); extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, insn_byte_t vex_pp); /* Attribute checking functions */ static inline int inat_is_legacy_prefix(insn_attr_t attr) { attr &= INAT_PFX_MASK; return attr && attr <= INAT_LGCPFX_MAX; } static inline int inat_is_address_size_prefix(insn_attr_t attr) { return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; } static inline int inat_is_operand_size_prefix(insn_attr_t attr) { return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; } static inline int inat_is_rex_prefix(insn_attr_t attr) { return (attr & INAT_PFX_MASK) == INAT_PFX_REX; } static inline int inat_last_prefix_id(insn_attr_t attr) { if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) return 0; else return attr & INAT_PFX_MASK; } static inline int inat_is_vex_prefix(insn_attr_t attr) { attr &= INAT_PFX_MASK; return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 || attr == INAT_PFX_EVEX; } static inline int inat_is_evex_prefix(insn_attr_t attr) { return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX; } static inline int inat_is_vex3_prefix(insn_attr_t attr) { return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; } static inline int inat_is_escape(insn_attr_t attr) { return attr & INAT_ESC_MASK; } static inline int inat_escape_id(insn_attr_t attr) { return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; } static inline int inat_is_group(insn_attr_t attr) { return attr & INAT_GRP_MASK; } static inline int inat_group_id(insn_attr_t attr) { return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; } static inline int inat_group_common_attribute(insn_attr_t attr) { return attr & ~INAT_GRP_MASK; } static inline int inat_has_immediate(insn_attr_t attr) { return attr & INAT_IMM_MASK; } static inline int inat_immediate_size(insn_attr_t attr) { return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; } static inline int inat_has_modrm(insn_attr_t attr) { return attr & INAT_MODRM; } static inline int inat_is_force64(i