1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 // SPDX-License-Identifier: GPL-2.0-only /* * Lock-less NULL terminated single linked list * * The basic atomic operation of this list is cmpxchg on long. On * architectures that don't have NMI-safe cmpxchg implementation, the * list can NOT be used in NMI handlers. So code that uses the list in * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. * * Copyright 2010,2011 Intel Corp. * Author: Huang Ying <ying.huang@intel.com> */ #include <linux/kernel.h> #include <linux/export.h> #include <linux/llist.h> /** * llist_add_batch - add several linked entries in batch * @new_first: first entry in batch to be added * @new_last: last entry in batch to be added * @head: the head for your lock-less list * * Return whether list is empty before adding. */ bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, struct llist_head *head) { struct llist_node *first; do { new_last->next = first = READ_ONCE(head->first); } while (cmpxchg(&head->first, first, new_first) != first); return !first; } EXPORT_SYMBOL_GPL(llist_add_batch); /** * llist_del_first - delete the first entry of lock-less list * @head: the head for your lock-less list * * If list is empty, return NULL, otherwise, return the first entry * deleted, this is the newest added one. * * Only one llist_del_first user can be used simultaneously with * multiple llist_add users without lock. Because otherwise * llist_del_first, llist_add, llist_add (or llist_del_all, llist_add, * llist_add) sequence in another user may change @head->first->next, * but keep @head->first. If multiple consumers are needed, please * use llist_del_all or use lock between consumers. */ struct llist_node *llist_del_first(struct llist_head *head) { struct llist_node *entry, *old_entry, *next; entry = smp_load_acquire(&head->first); for (;;) { if (entry == NULL) return NULL; old_entry = entry; next = READ_ONCE(entry->next); entry = cmpxchg(&head->first, old_entry, next); if (entry == old_entry) break; } return entry; } EXPORT_SYMBOL_GPL(llist_del_first); /** * llist_reverse_order - reverse order of a llist chain * @head: first item of the list to be reversed * * Reverse the order of a chain of llist entries and return the * new first entry. */ struct llist_node *llist_reverse_order(struct llist_node *head) { struct llist_node *new_head = NULL; while (head) { struct llist_node *tmp = head; head = head->next; tmp->next = new_head; new_head = tmp; } return new_head; } EXPORT_SYMBOL_GPL(llist_reverse_order);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _SCSI_DISK_H #define _SCSI_DISK_H /* * More than enough for everybody ;) The huge number of majors * is a leftover from 16bit dev_t days, we don't really need that * much numberspace. */ #define SD_MAJORS 16 /* * Time out in seconds for disks and Magneto-opticals (which are slower). */ #define SD_TIMEOUT (30 * HZ) #define SD_MOD_TIMEOUT (75 * HZ) /* * Flush timeout is a multiplier over the standard device timeout which is * user modifiable via sysfs but initially set to SD_TIMEOUT */ #define SD_FLUSH_TIMEOUT_MULTIPLIER 2 #define SD_WRITE_SAME_TIMEOUT (120 * HZ) /* * Number of allowed retries */ #define SD_MAX_RETRIES 5 #define SD_PASSTHROUGH_RETRIES 1 #define SD_MAX_MEDIUM_TIMEOUTS 2 /* * Size of the initial data buffer for mode and read capacity data */ #define SD_BUF_SIZE 512 /* * Number of sectors at the end of the device to avoid multi-sector * accesses to in the case of last_sector_bug */ #define SD_LAST_BUGGY_SECTORS 8 enum { SD_EXT_CDB_SIZE = 32, /* Extended CDB size */ SD_MEMPOOL_SIZE = 2, /* CDB pool size */ }; enum { SD_DEF_XFER_BLOCKS = 0xffff, SD_MAX_XFER_BLOCKS = 0xffffffff, SD_MAX_WS10_BLOCKS = 0xffff, SD_MAX_WS16_BLOCKS = 0x7fffff, }; enum { SD_LBP_FULL = 0, /* Full logical block provisioning */ SD_LBP_UNMAP, /* Use UNMAP command */ SD_LBP_WS16, /* Use WRITE SAME(16) with UNMAP bit */ SD_LBP_WS10, /* Use WRITE SAME(10) with UNMAP bit */ SD_LBP_ZERO, /* Use WRITE SAME(10) with zero payload */ SD_LBP_DISABLE, /* Discard disabled due to failed cmd */ }; enum { SD_ZERO_WRITE = 0, /* Use WRITE(10/16) command */ SD_ZERO_WS, /* Use WRITE SAME(10/16) command */ SD_ZERO_WS16_UNMAP, /* Use WRITE SAME(16) with UNMAP */ SD_ZERO_WS10_UNMAP, /* Use WRITE SAME(10) with UNMAP */ }; struct scsi_disk { struct scsi_driver *driver; /* always &sd_template */ struct scsi_device *device; struct device dev; struct gendisk *disk; struct opal_dev *opal_dev; #ifdef CONFIG_BLK_DEV_ZONED u32 nr_zones; u32 rev_nr_zones; u32 zone_blocks; u32 rev_zone_blocks; u32 zones_optimal_open; u32 zones_optimal_nonseq; u32 zones_max_open; u32 *zones_wp_offset; spinlock_t zones_wp_offset_lock; u32 *rev_wp_offset; struct mutex rev_mutex; struct work_struct zone_wp_offset_work; char *zone_wp_update_buf; #endif atomic_t openers; sector_t capacity; /* size in logical blocks */ int max_retries; u32 max_xfer_blocks; u32 opt_xfer_blocks; u32 max_ws_blocks; u32 max_unmap_blocks; u32 unmap_granularity; u32 unmap_alignment; u32 index; unsigned int physical_block_size; unsigned int max_medium_access_timeouts; unsigned int medium_access_timed_out; u8 media_present; u8 write_prot; u8 protection_type;/* Data Integrity Field */ u8 provisioning_mode; u8 zeroing_mode; unsigned ATO : 1; /* state of disk ATO bit */ unsigned cache_override : 1; /* temp override of WCE,RCD */ unsigned WCE : 1; /* state of disk WCE bit */ unsigned RCD : 1; /* state of disk RCD bit, unused */ unsigned DPOFUA : 1; /* state of disk DPOFUA bit */ unsigned first_scan : 1; unsigned lbpme : 1; unsigned lbprz : 1; unsigned lbpu : 1; unsigned lbpws : 1; unsigned lbpws10 : 1; unsigned lbpvpd : 1; unsigned ws10 : 1; unsigned ws16 : 1; unsigned rc_basis: 2; unsigned zoned: 2; unsigned urswrz : 1; unsigned security : 1; unsigned ignore_medium_access_errors : 1; }; #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,dev) static inline struct scsi_disk *scsi_disk(struct gendisk *disk) { return container_of(disk->private_data, struct scsi_disk, driver); } #define sd_printk(prefix, sdsk, fmt, a...) \ (sdsk)->disk ? \ sdev_prefix_printk(prefix, (sdsk)->device, \ (sdsk)->disk->disk_name, fmt, ##a) : \ sdev_printk(prefix, (sdsk)->device, fmt, ##a) #define sd_first_printk(prefix, sdsk, fmt, a...) \ do { \ if ((sdsk)->first_scan) \ sd_printk(prefix, sdsk, fmt, ##a); \ } while (0) static inline int scsi_medium_access_command(struct scsi_cmnd *scmd) { switch (scmd->cmnd[0]) { case READ_6: case READ_10: case READ_12: case READ_16: case SYNCHRONIZE_CACHE: case VERIFY: case VERIFY_12: case VERIFY_16: case WRITE_6: case WRITE_10: case WRITE_12: case WRITE_16: case WRITE_SAME: case WRITE_SAME_16: case UNMAP: return 1; case VARIABLE_LENGTH_CMD: switch (scmd->cmnd[9]) { case READ_32: case VERIFY_32: case WRITE_32: case WRITE_SAME_32: return 1; } } return 0; } static inline sector_t logical_to_sectors(struct scsi_device *sdev, sector_t blocks) { return blocks << (ilog2(sdev->sector_size) - 9); } static inline unsigned int logical_to_bytes(struct scsi_device *sdev, sector_t blocks) { return blocks * sdev->sector_size; } static inline sector_t bytes_to_logical(struct scsi_device *sdev, unsigned int bytes) { return bytes >> ilog2(sdev->sector_size); } static inline sector_t sectors_to_logical(struct scsi_device *sdev, sector_t sector) { return sector >> (ilog2(sdev->sector_size) - 9); } #ifdef CONFIG_BLK_DEV_INTEGRITY extern void sd_dif_config_host(struct scsi_disk *); #else /* CONFIG_BLK_DEV_INTEGRITY */ static inline void sd_dif_config_host(struct scsi_disk *disk) { } #endif /* CONFIG_BLK_DEV_INTEGRITY */ static inline int sd_is_zoned(struct scsi_disk *sdkp) { return sdkp->zoned == 1 || sdkp->device->type == TYPE_ZBC; } #ifdef CONFIG_BLK_DEV_ZONED void sd_zbc_release_disk(struct scsi_disk *sdkp); int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer); int sd_zbc_revalidate_zones(struct scsi_disk *sdkp); blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, unsigned char op, bool all); unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, struct scsi_sense_hdr *sshdr); int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba, unsigned int nr_blocks); #else /* CONFIG_BLK_DEV_ZONED */ static inline void sd_zbc_release_disk(struct scsi_disk *sdkp) {} static inline int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) { return 0; } static inline int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) { return 0; } static inline blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, unsigned char op, bool all) { return BLK_STS_TARGET; } static inline unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, struct scsi_sense_hdr *sshdr) { return good_bytes; } static inline blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba, unsigned int nr_blocks) { return BLK_STS_TARGET; } #define sd_zbc_report_zones NULL #endif /* CONFIG_BLK_DEV_ZONED */ void sd_print_sense_hdr(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr); void sd_print_result(const struct scsi_disk *sdkp, const char *msg, int result); #endif /* _SCSI_DISK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* request_key authorisation token key type * * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef _KEYS_REQUEST_KEY_AUTH_TYPE_H #define _KEYS_REQUEST_KEY_AUTH_TYPE_H #include <linux/key.h> /* * Authorisation record for request_key(). */ struct request_key_auth { struct rcu_head rcu; struct key *target_key; struct key *dest_keyring; const struct cred *cred; void *callout_info; size_t callout_len; pid_t pid; char op[8]; } __randomize_layout; static inline struct request_key_auth *get_request_key_auth(const struct key *key) { return key->payload.data[0]; } #endif /* _KEYS_REQUEST_KEY_AUTH_TYPE_H */
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_CPUMASK_H #define __LINUX_CPUMASK_H /* * Cpumasks provide a bitmap suitable for representing the * set of CPU's in a system, one bit position per CPU number. In general, * only nr_cpu_ids (<= NR_CPUS) bits are valid. */ #include <linux/kernel.h> #include <linux/threads.h> #include <linux/bitmap.h> #include <linux/atomic.h> #include <linux/bug.h> /* Don't assign or return these: may not be this big! */ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; /** * cpumask_bits - get the bits in a cpumask * @maskp: the struct cpumask * * * You should only assume nr_cpu_ids bits of this mask are valid. This is * a macro so it's const-correct. */ #define cpumask_bits(maskp) ((maskp)->bits) /** * cpumask_pr_args - printf args to output a cpumask * @maskp: cpumask to be printed * * Can be used to provide arguments for '%*pb[l]' when printing a cpumask. */ #define cpumask_pr_args(maskp) nr_cpu_ids, cpumask_bits(maskp) #if NR_CPUS == 1 #define nr_cpu_ids 1U #else extern unsigned int nr_cpu_ids; #endif #ifdef CONFIG_CPUMASK_OFFSTACK /* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, * not all bits may be allocated. */ #define nr_cpumask_bits nr_cpu_ids #else #define nr_cpumask_bits ((unsigned int)NR_CPUS) #endif /* * The following particular system cpumasks and operations manage * possible, present, active and online cpus. * * cpu_possible_mask- has bit 'cpu' set iff cpu is populatable * cpu_present_mask - has bit 'cpu' set iff cpu is populated * cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler * cpu_active_mask - has bit 'cpu' set iff cpu available to migration * * If !CONFIG_HOTPLUG_CPU, present == possible, and active == online. * * The cpu_possible_mask is fixed at boot time, as the set of CPU id's * that it is possible might ever be plugged in at anytime during the * life of that system boot. The cpu_present_mask is dynamic(*), * representing which CPUs are currently plugged in. And * cpu_online_mask is the dynamic subset of cpu_present_mask, * indicating those CPUs available for scheduling. * * If HOTPLUG is enabled, then cpu_possible_mask is forced to have * all NR_CPUS bits set, otherwise it is just the set of CPUs that * ACPI reports present at boot. * * If HOTPLUG is enabled, then cpu_present_mask varies dynamically, * depending on what ACPI reports as currently plugged in, otherwise * cpu_present_mask is just a copy of cpu_possible_mask. * * (*) Well, cpu_present_mask is dynamic in the hotplug case. If not * hotplug, it's a copy of cpu_possible_mask, hence fixed at boot. * * Subtleties: * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode * assumption that their single CPU is online. The UP * cpu_{online,possible,present}_masks are placebos. Changing them * will have no useful affect on the following num_*_cpus() * and cpu_*() macros in the UP case. This ugliness is a UP * optimization - don't waste any instructions or memory references * asking if you're online or how many CPUs there are if there is * only one CPU. */ extern struct cpumask __cpu_possible_mask; extern struct cpumask __cpu_online_mask; extern struct cpumask __cpu_present_mask; extern struct cpumask __cpu_active_mask; #define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask) #define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask) #define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask) #define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask) extern atomic_t __num_online_cpus; #if NR_CPUS > 1 /** * num_online_cpus() - Read the number of online CPUs * * Despite the fact that __num_online_cpus is of type atomic_t, this * interface gives only a momentary snapshot and is not protected against * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held * region. */ static inline unsigned int num_online_cpus(void) { return atomic_read(&__num_online_cpus); } #define num_possible_cpus() cpumask_weight(cpu_possible_mask) #define num_present_cpus() cpumask_weight(cpu_present_mask) #define num_active_cpus() cpumask_weight(cpu_active_mask) #define cpu_online(cpu) cpumask_test_cpu((cpu), cpu_online_mask) #define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask) #define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask) #define cpu_active(cpu) cpumask_test_cpu((cpu), cpu_active_mask) #else #define num_online_cpus() 1U #define num_possible_cpus() 1U #define num_present_cpus() 1U #define num_active_cpus() 1U #define cpu_online(cpu) ((cpu) == 0) #define cpu_possible(cpu) ((cpu) == 0) #define cpu_present(cpu) ((cpu) == 0) #define cpu_active(cpu) ((cpu) == 0) #endif extern cpumask_t cpus_booted_once_mask; static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits) { #ifdef CONFIG_DEBUG_PER_CPU_MAPS WARN_ON_ONCE(cpu >= bits); #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ } /* verify cpu argument to cpumask_* operators */ static inline unsigned int cpumask_check(unsigned int cpu) { cpu_max_bits_warn(cpu, nr_cpumask_bits); return cpu; } #if NR_CPUS == 1 /* Uniprocessor. Assume all masks are "1". */ static inline unsigned int cpumask_first(const struct cpumask *srcp) { return 0; } static inline unsigned int cpumask_last(const struct cpumask *srcp) { return 0; } /* Valid inputs for n are -1 and 0. */ static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) { return n+1; } static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) { return n+1; } static inline unsigned int cpumask_next_and(int n, const struct cpumask *srcp, const struct cpumask *andp) { return n+1; } static inline unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap) { /* cpu0 unless stop condition, wrap and at cpu0, then nr_cpumask_bits */ return (wrap && n == 0); } /* cpu must be a valid cpu, ie 0, so there's no other choice. */ static inline unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) { return 1; } static inline unsigned int cpumask_local_spread(unsigned int i, int node) { return 0; } static inline int cpumask_any_and_distribute(const struct cpumask *src1p, const struct cpumask *src2p) { return cpumask_next_and(-1, src1p, src2p); } #define for_each_cpu(cpu, mask) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) #define for_each_cpu_not(cpu, mask) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) #define for_each_cpu_wrap(cpu, mask, start) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start)) #define for_each_cpu_and(cpu, mask1, mask2) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask1, (void)mask2) #else /** * cpumask_first - get the first cpu in a cpumask * @srcp: the cpumask pointer * * Returns >= nr_cpu_ids if no cpus set. */ static inline unsigned int cpumask_first(const struct cpumask *srcp) { return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); } /** * cpumask_last - get the last CPU in a cpumask * @srcp: - the cpumask pointer * * Returns >= nr_cpumask_bits if no CPUs set. */ static inline unsigned int cpumask_last(const struct cpumask *srcp) { return find_last_bit(cpumask_bits(srcp), nr_cpumask_bits); } unsigned int cpumask_next(int n, const struct cpumask *srcp); /** * cpumask_next_zero - get the next unset cpu in a cpumask * @n: the cpu prior to the place to search (ie. return will be > @n) * @srcp: the cpumask pointer * * Returns >= nr_cpu_ids if no further cpus unset. */ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) { /* -1 is a legal arg here. */ if (n != -1) cpumask_check(n); return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); } int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); unsigned int cpumask_local_spread(unsigned int i, int node); int cpumask_any_and_distribute(const struct cpumask *src1p, const struct cpumask *src2p); /** * for_each_cpu - iterate over every cpu in a mask * @cpu: the (optionally unsigned) integer iterator * @mask: the cpumask pointer * * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu(cpu, mask) \ for ((cpu) = -1; \ (cpu) = cpumask_next((cpu), (mask)), \ (cpu) < nr_cpu_ids;) /** * for_each_cpu_not - iterate over every cpu in a complemented mask * @cpu: the (optionally unsigned) integer iterator * @mask: the cpumask pointer * * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_not(cpu, mask) \ for ((cpu) = -1; \ (cpu) = cpumask_next_zero((cpu), (mask)), \ (cpu) < nr_cpu_ids;) extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap); /** * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location * @cpu: the (optionally unsigned) integer iterator * @mask: the cpumask poiter * @start: the start location * * The implementation does not assume any bit in @mask is set (including @start). * * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_wrap(cpu, mask, start) \ for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false); \ (cpu) < nr_cpumask_bits; \ (cpu) = cpumask_next_wrap((cpu), (mask), (start), true)) /** * for_each_cpu_and - iterate over every cpu in both masks * @cpu: the (optionally unsigned) integer iterator * @mask1: the first cpumask pointer * @mask2: the second cpumask pointer * * This saves a temporary CPU mask in many places. It is equivalent to: * struct cpumask tmp; * cpumask_and(&tmp, &mask1, &mask2); * for_each_cpu(cpu, &tmp) * ... * * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_and(cpu, mask1, mask2) \ for ((cpu) = -1; \ (cpu) = cpumask_next_and((cpu), (mask1), (mask2)), \ (cpu) < nr_cpu_ids;) #endif /* SMP */ #define CPU_BITS_NONE \ { \ [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ } #define CPU_BITS_CPU0 \ { \ [0] = 1UL \ } /** * cpumask_set_cpu - set a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) * @dstp: the cpumask pointer */ static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) { set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } static inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) { __set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } /** * cpumask_clear_cpu - clear a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) * @dstp: the cpumask pointer */ static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp) { clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); } static inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) { __clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); } /** * cpumask_test_cpu - test for a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * * Returns 1 if @cpu is set in @cpumask, else returns 0 */ static inline int cpumask_test_cpu(int cpu, const struct cpumask *cpumask) { return test_bit(cpumask_check(cpu), cpumask_bits((cpumask))); } /** * cpumask_test_and_set_cpu - atomically test and set a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * * Returns 1 if @cpu is set in old bitmap of @cpumask, else returns 0 * * test_and_set_bit wrapper for cpumasks. */ static inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) { return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } /** * cpumask_test_and_clear_cpu - atomically test and clear a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * * Returns 1 if @cpu is set in old bitmap of @cpumask, else returns 0 * * test_and_clear_bit wrapper for cpumasks. */ static inline int cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) { return test_and_clear_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } /** * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask * @dstp: the cpumask pointer */ static inline void cpumask_setall(struct cpumask *dstp) { bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits); } /** * cpumask_clear - clear all cpus (< nr_cpu_ids) in a cpumask * @dstp: the cpumask pointer */ static inline void cpumask_clear(struct cpumask *dstp) { bitmap_zero(cpumask_bits(dstp), nr_cpumask_bits); } /** * cpumask_and - *dstp = *src1p & *src2p * @dstp: the cpumask result * @src1p: the first input * @src2p: the second input * * If *@dstp is empty, returns 0, else returns 1 */ static inline int cpumask_and(struct cpumask *dstp, const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), nr_cpumask_bits); } /** * cpumask_or - *dstp = *src1p | *src2p * @dstp: the cpumask result * @src1p: the first input * @src2p: the second input */ static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p, const struct cpumask *src2p) { bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), nr_cpumask_bits); } /** * cpumask_xor - *dstp = *src1p ^ *src2p * @dstp: the cpumask result * @src1p: the first input * @src2p: the second input */ static inline void cpumask_xor(struct cpumask *dstp, const struct cpumask *src1p, const struct cpumask *src2p) { bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), nr_cpumask_bits); } /** * cpumask_andnot - *dstp = *src1p & ~*src2p * @dstp: the cpumask result * @src1p: the first input * @src2p: the second input * * If *@dstp is empty, returns 0, else returns 1 */ static inline int cpumask_andnot(struct cpumask *dstp, const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), nr_cpumask_bits); } /** * cpumask_complement - *dstp = ~*srcp * @dstp: the cpumask result * @srcp: the input to invert */ static inline void cpumask_complement(struct cpumask *dstp, const struct cpumask *srcp) { bitmap_complement(cpumask_bits(dstp), cpumask_bits(srcp), nr_cpumask_bits); } /** * cpumask_equal - *src1p == *src2p * @src1p: the first input * @src2p: the second input */ static inline bool cpumask_equal(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p), nr_cpumask_bits); } /** * cpumask_or_equal - *src1p | *src2p == *src3p * @src1p: the first input * @src2p: the second input * @src3p: the third input */ static inline bool cpumask_or_equal(const struct cpumask *src1p, const struct cpumask *src2p, const struct cpumask *src3p) { return bitmap_or_equal(cpumask_bits(src1p), cpumask_bits(src2p), cpumask_bits(src3p), nr_cpumask_bits); } /** * cpumask_intersects - (*src1p & *src2p) != 0 * @src1p: the first input * @src2p: the second input */ static inline bool cpumask_intersects(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p), nr_cpumask_bits); } /** * cpumask_subset - (*src1p & ~*src2p) == 0 * @src1p: the first input * @src2p: the second input * * Returns 1 if *@src1p is a subset of *@src2p, else returns 0 */ static inline int cpumask_subset(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p), nr_cpumask_bits); } /** * cpumask_empty - *srcp == 0 * @srcp: the cpumask to that all cpus < nr_cpu_ids are clear. */ static inline bool cpumask_empty(const struct cpumask *srcp) { return bitmap_empty(cpumask_bits(srcp), nr_cpumask_bits); } /** * cpumask_full - *srcp == 0xFFFFFFFF... * @srcp: the cpumask to that all cpus < nr_cpu_ids are set. */ static inline bool cpumask_full(const struct cpumask *srcp) { return bitmap_full(cpumask_bits(srcp), nr_cpumask_bits); } /** * cpumask_weight - Count of bits in *srcp * @srcp: the cpumask to count bits (< nr_cpu_ids) in. */ static inline unsigned int cpumask_weight(const struct cpumask *srcp) { return bitmap_weight(cpumask_bits(srcp), nr_cpumask_bits); } /** * cpumask_shift_right - *dstp = *srcp >> n * @dstp: the cpumask result * @srcp: the input to shift * @n: the number of bits to shift by */ static inline void cpumask_shift_right(struct cpumask *dstp, const struct cpumask *srcp, int n) { bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n, nr_cpumask_bits); } /** * cpumask_shift_left - *dstp = *srcp << n * @dstp: the cpumask result * @srcp: the input to shift * @n: the number of bits to shift by */ static inline void cpumask_shift_left(struct cpumask *dstp, const struct cpumask *srcp, int n) { bitmap_shift_left(cpumask_bits(dstp), cpumask_bits(srcp), n, nr_cpumask_bits); } /** * cpumask_copy - *dstp = *srcp * @dstp: the result * @srcp: the input cpumask */ static inline void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp) { bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), nr_cpumask_bits); } /** * cpumask_any - pick a "random" cpu from *srcp * @srcp: the input cpumask * * Returns >= nr_cpu_ids if no cpus set. */ #define cpumask_any(srcp) cpumask_first(srcp) /** * cpumask_first_and - return the first cpu from *srcp1 & *srcp2 * @src1p: the first input * @src2p: the second input * * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). */ #define cpumask_first_and(src1p, src2p) cpumask_next_and(-1, (src1p), (src2p)) /** * cpumask_any_and - pick a "random" cpu from *mask1 & *mask2 * @mask1: the first input cpumask * @mask2: the second input cpumask * * Returns >= nr_cpu_ids if no cpus set. */ #define cpumask_any_and(mask1, mask2) cpumask_first_and((mask1), (mask2)) /** * cpumask_of - the cpumask containing just a given cpu * @cpu: the cpu (<= nr_cpu_ids) */ #define cpumask_of(cpu) (get_cpu_mask(cpu)) /** * cpumask_parse_user - extract a cpumask from a user string * @buf: the buffer to extract from * @len: the length of the buffer * @dstp: the cpumask to set. * * Returns -errno, or 0 for success. */ static inline int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp) { return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits); } /** * cpumask_parselist_user - extract a cpumask from a user string * @buf: the buffer to extract from * @len: the length of the buffer * @dstp: the cpumask to set. * * Returns -errno, or 0 for success. */ static inline int cpumask_parselist_user(const char __user *buf, int len, struct cpumask *dstp) { return bitmap_parselist_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits); } /** * cpumask_parse - extract a cpumask from a string * @buf: the buffer to extract from * @dstp: the cpumask to set. * * Returns -errno, or 0 for success. */ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) { return bitmap_parse(buf, UINT_MAX, cpumask_bits(dstp), nr_cpumask_bits); } /** * cpulist_parse - extract a cpumask from a user string of ranges * @buf: the buffer to extract from * @dstp: the cpumask to set. * * Returns -errno, or 0 for success. */ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) { return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits); } /** * cpumask_size - size to allocate for a 'struct cpumask' in bytes */ static inline unsigned int cpumask_size(void) { return BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long); } /* * cpumask_var_t: struct cpumask for stack usage. * * Oh, the wicked games we play! In order to make kernel coding a * little more difficult, we typedef cpumask_var_t to an array or a * pointer: doing &mask on an array is a noop, so it still works. * * ie. * cpumask_var_t tmpmask; * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) * return -ENOMEM; * * ... use 'tmpmask' like a normal struct cpumask * ... * * free_cpumask_var(tmpmask); * * * However, one notable exception is there. alloc_cpumask_var() allocates * only nr_cpumask_bits bits (in the other hand, real cpumask_t always has * NR_CPUS bits). Therefore you don't have to dereference cpumask_var_t. * * cpumask_var_t tmpmask; * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) * return -ENOMEM; * * var = *tmpmask; * * This code makes NR_CPUS length memcopy and brings to a memory corruption. * cpumask_copy() provide safe copy functionality. * * Note that there is another evil here: If you define a cpumask_var_t * as a percpu variable then the way to obtain the address of the cpumask * structure differently influences what this_cpu_* operation needs to be * used. Please use this_cpu_cpumask_var_t in those cases. The direct use * of this_cpu_ptr() or this_cpu_read() will lead to failures when the * other type of cpumask_var_t implementation is configured. * * Please also note that __cpumask_var_read_mostly can be used to declare * a cpumask_var_t variable itself (not its content) as read mostly. */ #ifdef CONFIG_CPUMASK_OFFSTACK typedef struct cpumask *cpumask_var_t; #define this_cpu_cpumask_var_ptr(x) this_cpu_read(x) #define __cpumask_var_read_mostly __read_mostly bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); void alloc_bootmem_cpumask_var(cpumask_var_t *mask); void free_cpumask_var(cpumask_var_t mask); void free_bootmem_cpumask_var(cpumask_var_t mask); static inline bool cpumask_available(cpumask_var_t mask) { return mask != NULL; } #else typedef struct cpumask cpumask_var_t[1]; #define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x) #define __cpumask_var_read_mostly static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { return true; } static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) { return true; } static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { cpumask_clear(*mask); return true; } static inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) { cpumask_clear(*mask); return true; } static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) { } static inline void free_cpumask_var(cpumask_var_t mask) { } static inline void free_bootmem_cpumask_var(cpumask_var_t mask) { } static inline bool cpumask_available(cpumask_var_t mask) { return true; } #endif /* CONFIG_CPUMASK_OFFSTACK */ /* It's common to want to use cpu_all_mask in struct member initializers, * so it has to refer to an address rather than a pointer. */ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); #define cpu_all_mask to_cpumask(cpu_all_bits) /* First bits of cpu_bit_bitmap are in fact unset. */ #define cpu_none_mask to_cpumask(cpu_bit_bitmap[0]) #define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask) #define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask) #define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask) /* Wrappers for arch boot code to manipulate normally-constant masks */ void init_cpu_present(const struct cpumask *src); void init_cpu_possible(const struct cpumask *src); void init_cpu_online(const struct cpumask *src); static inline void reset_cpu_possible_mask(void) { bitmap_zero(cpumask_bits(&__cpu_possible_mask), NR_CPUS); } static inline void set_cpu_possible(unsigned int cpu, bool possible) { if (possible) cpumask_set_cpu(cpu, &__cpu_possible_mask); else cpumask_clear_cpu(cpu, &__cpu_possible_mask); } static inline void set_cpu_present(unsigned int cpu, bool present) { if (present) cpumask_set_cpu(cpu, &__cpu_present_mask); else cpumask_clear_cpu(cpu, &__cpu_present_mask); } void set_cpu_online(unsigned int cpu, bool online); static inline void set_cpu_active(unsigned int cpu, bool active) { if (active) cpumask_set_cpu(cpu, &__cpu_active_mask); else cpumask_clear_cpu(cpu, &__cpu_active_mask); } /** * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap * * There are a few places where cpumask_var_t isn't appropriate and * static cpumasks must be used (eg. very early boot), yet we don't * expose the definition of 'struct cpumask'. * * This does the conversion, and can be used as a constant initializer. */ #define to_cpumask(bitmap) \ ((struct cpumask *)(1 ? (bitmap) \ : (void *)sizeof(__check_is_bitmap(bitmap)))) static inline int __check_is_bitmap(const unsigned long *bitmap) { return 1; } /* * Special-case data structure for "single bit set only" constant CPU masks. * * We pre-generate all the 64 (or 32) possible bit positions, with enough * padding to the left and the right, and return the constant pointer * appropriately offset. */ extern const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)]; static inline const struct cpumask *get_cpu_mask(unsigned int cpu) { const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; p -= cpu / BITS_PER_LONG; return to_cpumask(p); } #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) #if NR_CPUS <= BITS_PER_LONG #define CPU_BITS_ALL \ { \ [BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS) \ } #else /* NR_CPUS > BITS_PER_LONG */ #define CPU_BITS_ALL \ { \ [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ [BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS) \ } #endif /* NR_CPUS > BITS_PER_LONG */ /** * cpumap_print_to_pagebuf - copies the cpumask into the buffer either * as comma-separated list of cpus or hex values of cpumask * @list: indicates whether the cpumap must be list * @mask: the cpumask to copy * @buf: the buffer to copy into * * Returns the length of the (null-terminated) @buf string, zero if * nothing is copied. */ static inline ssize_t cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) { return bitmap_print_to_pagebuf(list, buf, cpumask_bits(mask), nr_cpu_ids); } #if NR_CPUS <= BITS_PER_LONG #define CPU_MASK_ALL \ (cpumask_t) { { \ [BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS) \ } } #else #define CPU_MASK_ALL \ (cpumask_t) { { \ [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ [BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS) \ } } #endif /* NR_CPUS > BITS_PER_LONG */ #define CPU_MASK_NONE \ (cpumask_t) { { \ [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ } } #define CPU_MASK_CPU0 \ (cpumask_t) { { \ [0] = 1UL \ } } #endif /* __LINUX_CPUMASK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2020 ARM Ltd. */ #ifndef __ASM_VDSO_PROCESSOR_H #define __ASM_VDSO_PROCESSOR_H #ifndef __ASSEMBLY__ /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ static __always_inline void rep_nop(void) { asm volatile("rep; nop" ::: "memory"); } static __always_inline void cpu_relax(void) { rep_nop(); } #endif /* __ASSEMBLY__ */ #endif /* __ASM_VDSO_PROCESSOR_H */
1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPv6 fragment reassembly for connection tracking * * Copyright (C)2004 USAGI/WIDE Project * * Author: * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> * * Based on: net/ipv6/reassembly.c */ #define pr_fmt(fmt) "IPv6-nf: " fmt #include <linux/errno.h> #include <linux/types.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/jiffies.h> #include <linux/net.h> #include <linux/list.h> #include <linux/netdevice.h> #include <linux/in6.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/random.h> #include <linux/slab.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ipv6_frag.h> #include <net/protocol.h> #include <net/transp_v6.h> #include <net/rawv6.h> #include <net/ndisc.h> #include <net/addrconf.h> #include <net/inet_ecn.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> #include <linux/sysctl.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <linux/kernel.h> #include <linux/module.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> static const char nf_frags_cache_name[] = "nf-frags"; static struct inet_frags nf_frags; #ifdef CONFIG_SYSCTL static struct ctl_table nf_ct_frag6_sysctl_table[] = { { .procname = "nf_conntrack_frag6_timeout", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_frag6_low_thresh", .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "nf_conntrack_frag6_high_thresh", .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { } }; static int nf_ct_frag6_sysctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; table = nf_ct_frag6_sysctl_table; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(nf_ct_frag6_sysctl_table), GFP_KERNEL); if (table == NULL) goto err_alloc; } table[0].data = &net->nf_frag.fqdir->timeout; table[1].data = &net->nf_frag.fqdir->low_thresh; table[1].extra2 = &net->nf_frag.fqdir->high_thresh; table[2].data = &net->nf_frag.fqdir->high_thresh; table[2].extra1 = &net->nf_frag.fqdir->low_thresh; table[2].extra2 = &init_net.nf_frag.fqdir->high_thresh; hdr = register_net_sysctl(net, "net/netfilter", table); if (hdr == NULL) goto err_reg; net->nf_frag_frags_hdr = hdr; return 0; err_reg: if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; } static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) { struct ctl_table *table; table = net->nf_frag_frags_hdr->ctl_table_arg; unregister_net_sysctl_table(net->nf_frag_frags_hdr); if (!net_eq(net, &init_net)) kfree(table); } #else static int nf_ct_frag6_sysctl_register(struct net *net) { return 0; } static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) { } #endif static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, struct sk_buff *prev_tail, struct net_device *dev); static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); } static void nf_ct_frag6_expire(struct timer_list *t) { struct inet_frag_queue *frag = from_timer(frag, t, timer); struct frag_queue *fq; fq = container_of(frag, struct frag_queue, q); ip6frag_expire_frag_queue(fq->q.fqdir->net, fq); } /* Creation primitives. */ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, const struct ipv6hdr *hdr, int iif) { struct frag_v6_compare_key key = { .id = id, .saddr = hdr->saddr, .daddr = hdr->daddr, .user = user, .iif = iif, }; struct inet_frag_queue *q; q = inet_frag_find(net->nf_frag.fqdir, &key); if (!q) return NULL; return container_of(q, struct frag_queue, q); } static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, const struct frag_hdr *fhdr, int nhoff) { unsigned int payload_len; struct net_device *dev; struct sk_buff *prev; int offset, end, err; u8 ecn; if (fq->q.flags & INET_FRAG_COMPLETE) { pr_debug("Already completed\n"); goto err; } payload_len = ntohs(ipv6_hdr(skb)->payload_len); offset = ntohs(fhdr->frag_off) & ~0x7; end = offset + (payload_len - ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { pr_debug("offset is too large.\n"); return -EINVAL; } ecn = ip6_frag_ecn(ipv6_hdr(skb)); if (skb->ip_summed == CHECKSUM_COMPLETE) { const unsigned char *nh = skb_network_header(skb); skb->csum = csum_sub(skb->csum, csum_partial(nh, (u8 *)(fhdr + 1) - nh, 0)); } /* Is this the final fragment? */ if (!(fhdr->frag_off & htons(IP6_MF))) { /* If we already have some bits beyond end * or have different end, the segment is corrupted. */ if (end < fq->q.len || ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) { pr_debug("already received last fragment\n"); goto err; } fq->q.flags |= INET_FRAG_LAST_IN; fq->q.len = end; } else { /* Check if the fragment is rounded to 8 bytes. * Required by the RFC. */ if (end & 0x7) { /* RFC2460 says always send parameter problem in * this case. -DaveM */ pr_debug("end of fragment not rounded to 8 bytes.\n"); inet_frag_kill(&fq->q); return -EPROTO; } if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ if (fq->q.flags & INET_FRAG_LAST_IN) { pr_debug("last packet already reached.\n"); goto err; } fq->q.len = end; } } if (end == offset) goto err; /* Point into the IP datagram 'data' part. */ if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) { pr_debug("queue: message is too short.\n"); goto err; } if (pskb_trim_rcsum(skb, end - offset)) { pr_debug("Can't trim\n"); goto err; } /* Note : skb->rbnode and skb->dev share the same location. */ dev = skb->dev; /* Makes sure compiler wont do silly aliasing games */ barrier(); prev = fq->q.fragments_tail; err = inet_frag_queue_insert(&fq->q, skb, offset, end); if (err) { if (err == IPFRAG_DUP) { /* No error for duplicates, pretend they got queued. */ kfree_skb(skb); return -EINPROGRESS; } goto insert_error; } if (dev) fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; fq->ecn |= ecn; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; add_frag_mem_limit(fq->q.fqdir, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. */ if (offset == 0) { fq->nhoffset = nhoff; fq->q.flags |= INET_FRAG_FIRST_IN; } if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; err = nf_ct_frag6_reasm(fq, skb, prev, dev); skb->_skb_refdst = orefdst; /* After queue has assumed skb ownership, only 0 or * -EINPROGRESS must be returned. */ return err ? -EINPROGRESS : 0; } skb_dst_drop(skb); return -EINPROGRESS; insert_error: inet_frag_kill(&fq->q); err: skb_dst_drop(skb); return -EINVAL; } /* * Check if this packet is complete. * * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. */ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, struct sk_buff *prev_tail, struct net_device *dev) { void *reasm_data; int payload_len; u8 ecn; inet_frag_kill(&fq->q); ecn = ip_frag_ecn_table[fq->ecn]; if (unlikely(ecn == 0xff)) goto err; reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); if (!reasm_data) goto err; payload_len = ((skb->data - skb_network_header(skb)) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) { net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", payload_len); goto err; } /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ skb_network_header(skb)[fq->nhoffset] = skb_transport_header(skb)[0]; memmove(skb->head + sizeof(struct frag_hdr), skb->head, (skb->data - skb->head) - sizeof(struct frag_hdr)); skb->mac_header += sizeof(struct frag_hdr); skb->network_header += sizeof(struct frag_hdr); skb_reset_transport_header(skb); inet_frag_reasm_finish(&fq->q, skb, reasm_data, false); skb->ignore_df = 1; skb->dev = dev; ipv6_hdr(skb)->payload_len = htons(payload_len); ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn); IP6CB(skb)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; /* Yes, and fold redundant checksum back. 8) */ if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_partial(skb_network_header(skb), skb_network_header_len(skb), skb->csum); fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; fq->q.last_run_head = NULL; return 0; err: inet_frag_kill(&fq->q); return -EINVAL; } /* * find the header just before Fragment Header. * * if success return 0 and set ... * (*prevhdrp): the value of "Next Header Field" in the header * just before Fragment Header. * (*prevhoff): the offset of "Next Header Field" in the header * just before Fragment Header. * (*fhoff) : the offset of Fragment Header. * * Based on ipv6_skip_hdr() in net/ipv6/exthdr.c * */ static int find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) { u8 nexthdr = ipv6_hdr(skb)->nexthdr; const int netoff = skb_network_offset(skb); u8 prev_nhoff = netoff + offsetof(struct ipv6hdr, nexthdr); int start = netoff + sizeof(struct ipv6hdr); int len = skb->len - start; u8 prevhdr = NEXTHDR_IPV6; while (nexthdr != NEXTHDR_FRAGMENT) { struct ipv6_opt_hdr hdr; int hdrlen; if (!ipv6_ext_hdr(nexthdr)) { return -1; } if (nexthdr == NEXTHDR_NONE) { pr_debug("next header is none\n"); return -1; } if (len < (int)sizeof(struct ipv6_opt_hdr)) { pr_debug("too short\n"); return -1; } if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) BUG(); if (nexthdr == NEXTHDR_AUTH) hdrlen = ipv6_authlen(&hdr); else hdrlen = ipv6_optlen(&hdr); prevhdr = nexthdr; prev_nhoff = start; nexthdr = hdr.nexthdr; len -= hdrlen; start += hdrlen; } if (len < 0) return -1; *prevhdrp = prevhdr; *prevhoff = prev_nhoff; *fhoff = start; return 0; } int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { u16 savethdr = skb->transport_header; u8 nexthdr = NEXTHDR_FRAGMENT; int fhoff, nhoff, ret; struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr; u8 prevhdr; /* Jumbo payload inhibits frag. header */ if (ipv6_hdr(skb)->payload_len == 0) { pr_debug("payload len = 0\n"); return 0; } if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) return 0; /* Discard the first fragment if it does not include all headers * RFC 8200, Section 4.5 */ if (ipv6frag_thdr_truncated(skb, fhoff, &nexthdr)) { pr_debug("Drop incomplete fragment\n"); return 0; } if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr))) return -ENOMEM; skb_set_transport_header(skb, fhoff); hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); if (fq == NULL) { pr_debug("Can't find and can't create new queue\n"); return -ENOMEM; } spin_lock_bh(&fq->q.lock); ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff); if (ret == -EPROTO) { skb->transport_header = savethdr; ret = 0; } spin_unlock_bh(&fq->q.lock); inet_frag_put(&fq->q); return ret; } EXPORT_SYMBOL_GPL(nf_ct_frag6_gather); static int nf_ct_net_init(struct net *net) { int res; res = fqdir_init(&net->nf_frag.fqdir, &nf_frags, net); if (res < 0) return res; net->nf_frag.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH; net->nf_frag.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH; net->nf_frag.fqdir->timeout = IPV6_FRAG_TIMEOUT; res = nf_ct_frag6_sysctl_register(net); if (res < 0) fqdir_exit(net->nf_frag.fqdir); return res; } static void nf_ct_net_pre_exit(struct net *net) { fqdir_pre_exit(net->nf_frag.fqdir); } static void nf_ct_net_exit(struct net *net) { nf_ct_frags6_sysctl_unregister(net); fqdir_exit(net->nf_frag.fqdir); } static struct pernet_operations nf_ct_net_ops = { .init = nf_ct_net_init, .pre_exit = nf_ct_net_pre_exit, .exit = nf_ct_net_exit, }; static const struct rhashtable_params nfct_rhash_params = { .head_offset = offsetof(struct inet_frag_queue, node), .hashfn = ip6frag_key_hashfn, .obj_hashfn = ip6frag_obj_hashfn, .obj_cmpfn = ip6frag_obj_cmpfn, .automatic_shrinking = true, }; int nf_ct_frag6_init(void) { int ret = 0; nf_frags.constructor = ip6frag_init; nf_frags.destructor = NULL; nf_frags.qsize = sizeof(struct frag_queue); nf_frags.frag_expire = nf_ct_frag6_expire; nf_frags.frags_cache_name = nf_frags_cache_name; nf_frags.rhash_params = nfct_rhash_params; ret = inet_frags_init(&nf_frags); if (ret) goto out; ret = register_pernet_subsys(&nf_ct_net_ops); if (ret) inet_frags_fini(&nf_frags); out: return ret; } void nf_ct_frag6_cleanup(void) { unregister_pernet_subsys(&nf_ct_net_ops); inet_frags_fini(&nf_frags); }
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 // SPDX-License-Identifier: GPL-2.0-only /* * net/core/fib_rules.c Generic Routing Rules * * Authors: Thomas Graf <tgraf@suug.ch> */ #include <linux/types.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/module.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/fib_rules.h> #include <net/ip_tunnels.h> #include <linux/indirect_call_wrapper.h> #if defined(CONFIG_IPV6) && defined(CONFIG_IPV6_MULTIPLE_TABLES) #ifdef CONFIG_IP_MULTIPLE_TABLES #define INDIRECT_CALL_MT(f, f2, f1, ...) \ INDIRECT_CALL_INET(f, f2, f1, __VA_ARGS__) #else #define INDIRECT_CALL_MT(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__) #endif #elif defined(CONFIG_IP_MULTIPLE_TABLES) #define INDIRECT_CALL_MT(f, f2, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__) #else #define INDIRECT_CALL_MT(f, f2, f1, ...) f(__VA_ARGS__) #endif static const struct fib_kuid_range fib_kuid_range_unset = { KUIDT_INIT(0), KUIDT_INIT(~0), }; bool fib_rule_matchall(const struct fib_rule *rule) { if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id || rule->flags) return false; if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1) return false; if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) || !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end)) return false; if (fib_rule_port_range_set(&rule->sport_range)) return false; if (fib_rule_port_range_set(&rule->dport_range)) return false; return true; } EXPORT_SYMBOL_GPL(fib_rule_matchall); int fib_default_rule_add(struct fib_rules_ops *ops, u32 pref, u32 table, u32 flags) { struct fib_rule *r; r = kzalloc(ops->rule_size, GFP_KERNEL); if (r == NULL) return -ENOMEM; refcount_set(&r->refcnt, 1); r->action = FR_ACT_TO_TBL; r->pref = pref; r->table = table; r->flags = flags; r->proto = RTPROT_KERNEL; r->fr_net = ops->fro_net; r->uid_range = fib_kuid_range_unset; r->suppress_prefixlen = -1; r->suppress_ifgroup = -1; /* The lock is not required here, the list in unreacheable * at the moment this function is called */ list_add_tail(&r->list, &ops->rules_list); return 0; } EXPORT_SYMBOL(fib_default_rule_add); static u32 fib_default_rule_pref(struct fib_rules_ops *ops) { struct list_head *pos; struct fib_rule *rule; if (!list_empty(&ops->rules_list)) { pos = ops->rules_list.next; if (pos->next != &ops->rules_list) { rule = list_entry(pos->next, struct fib_rule, list); if (rule->pref) return rule->pref - 1; } } return 0; } static void notify_rule_change(int event, struct fib_rule *rule, struct fib_rules_ops *ops, struct nlmsghdr *nlh, u32 pid); static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family) { struct fib_rules_ops *ops; rcu_read_lock(); list_for_each_entry_rcu(ops, &net->rules_ops, list) { if (ops->family == family) { if (!try_module_get(ops->owner)) ops = NULL; rcu_read_unlock(); return ops; } } rcu_read_unlock(); return NULL; } static void rules_ops_put(struct fib_rules_ops *ops) { if (ops) module_put(ops->owner); } static void flush_route_cache(struct fib_rules_ops *ops) { if (ops->flush_cache) ops->flush_cache(ops); } static int __fib_rules_register(struct fib_rules_ops *ops) { int err = -EEXIST; struct fib_rules_ops *o; struct net *net; net = ops->fro_net; if (ops->rule_size < sizeof(struct fib_rule)) return -EINVAL; if (ops->match == NULL || ops->configure == NULL || ops->compare == NULL || ops->fill == NULL || ops->action == NULL) return -EINVAL; spin_lock(&net->rules_mod_lock); list_for_each_entry(o, &net->rules_ops, list) if (ops->family == o->family) goto errout; list_add_tail_rcu(&ops->list, &net->rules_ops); err = 0; errout: spin_unlock(&net->rules_mod_lock); return err; } struct fib_rules_ops * fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net) { struct fib_rules_ops *ops; int err; ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL); if (ops == NULL) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&ops->rules_list); ops->fro_net = net; err = __fib_rules_register(ops); if (err) { kfree(ops); ops = ERR_PTR(err); } return ops; } EXPORT_SYMBOL_GPL(fib_rules_register); static void fib_rules_cleanup_ops(struct fib_rules_ops *ops) { struct fib_rule *rule, *tmp; list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) { list_del_rcu(&rule->list); if (ops->delete) ops->delete(rule); fib_rule_put(rule); } } void fib_rules_unregister(struct fib_rules_ops *ops) { struct net *net = ops->fro_net; spin_lock(&net->rules_mod_lock); list_del_rcu(&ops->list); spin_unlock(&net->rules_mod_lock); fib_rules_cleanup_ops(ops); kfree_rcu(ops, rcu); } EXPORT_SYMBOL_GPL(fib_rules_unregister); static int uid_range_set(struct fib_kuid_range *range) { return uid_valid(range->start) && uid_valid(range->end); } static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb) { struct fib_rule_uid_range *in; struct fib_kuid_range out; in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]); out.start = make_kuid(current_user_ns(), in->start); out.end = make_kuid(current_user_ns(), in->end); return out; } static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range) { struct fib_rule_uid_range out = { from_kuid_munged(current_user_ns(), range->start), from_kuid_munged(current_user_ns(), range->end) }; return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out); } static int nla_get_port_range(struct nlattr *pattr, struct fib_rule_port_range *port_range) { const struct fib_rule_port_range *pr = nla_data(pattr); if (!fib_rule_port_range_valid(pr)) return -EINVAL; port_range->start = pr->start; port_range->end = pr->end; return 0; } static int nla_put_port_range(struct sk_buff *skb, int attrtype, struct fib_rule_port_range *range) { return nla_put(skb, attrtype, sizeof(*range), range); } static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) { int ret = 0; if (rule->iifindex && (rule->iifindex != fl->flowi_iif)) goto out; if (rule->oifindex && (rule->oifindex != fl->flowi_oif)) goto out; if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) goto out; if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id)) goto out; if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg)) goto out; if (uid_lt(fl->flowi_uid, rule->uid_range.start) || uid_gt(fl->flowi_uid, rule->uid_range.end)) goto out; ret = INDIRECT_CALL_MT(ops->match, fib6_rule_match, fib4_rule_match, rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; } int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) { struct fib_rule *rule; int err; rcu_read_lock(); list_for_each_entry_rcu(rule, &ops->rules_list, list) { jumped: if (!fib_rule_match(rule, ops, fl, flags, arg)) continue; if (rule->action == FR_ACT_GOTO) { struct fib_rule *target; target = rcu_dereference(rule->ctarget); if (target == NULL) { continue; } else { rule = target; goto jumped; } } else if (rule->action == FR_ACT_NOP) continue; else err = INDIRECT_CALL_MT(ops->action, fib6_rule_action, fib4_rule_action, rule, fl, flags, arg); if (!err && ops->suppress && INDIRECT_CALL_MT(ops->suppress, fib6_rule_suppress, fib4_rule_suppress, rule, flags, arg)) continue; if (err != -EAGAIN) { if ((arg->flags & FIB_LOOKUP_NOREF) || likely(refcount_inc_not_zero(&rule->refcnt))) { arg->rule = rule; goto out; } break; } } err = -ESRCH; out: rcu_read_unlock(); return err; } EXPORT_SYMBOL_GPL(fib_rules_lookup); static int call_fib_rule_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_rule *rule, int family, struct netlink_ext_ack *extack) { struct fib_rule_notifier_info info = { .info.family = family, .info.extack = extack, .rule = rule, }; return call_fib_notifier(nb, event_type, &info.info); } static int call_fib_rule_notifiers(struct net *net, enum fib_event_type event_type, struct fib_rule *rule, struct fib_rules_ops *ops, struct netlink_ext_ack *extack) { struct fib_rule_notifier_info info = { .info.family = ops->family, .info.extack = extack, .rule = rule, }; ops->fib_rules_seq++; return call_fib_notifiers(net, event_type, &info.info); } /* Called with rcu_read_lock() */ int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, struct netlink_ext_ack *extack) { struct fib_rules_ops *ops; struct fib_rule *rule; int err = 0; ops = lookup_rules_ops(net, family); if (!ops) return -EAFNOSUPPORT; list_for_each_entry_rcu(rule, &ops->rules_list, list) { err = call_fib_rule_notifier(nb, FIB_EVENT_RULE_ADD, rule, family, extack); if (err) break; } rules_ops_put(ops); return err; } EXPORT_SYMBOL_GPL(fib_rules_dump); unsigned int fib_rules_seq_read(struct net *net, int family) { unsigned int fib_rules_seq; struct fib_rules_ops *ops; ASSERT_RTNL(); ops = lookup_rules_ops(net, family); if (!ops) return 0; fib_rules_seq = ops->fib_rules_seq; rules_ops_put(ops); return fib_rules_seq; } EXPORT_SYMBOL_GPL(fib_rules_seq_read); static struct fib_rule *rule_find(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, struct nlattr **tb, struct fib_rule *rule, bool user_priority) { struct fib_rule *r; list_for_each_entry(r, &ops->rules_list, list) { if (rule->action && r->action != rule->action) continue; if (rule->table && r->table != rule->table) continue; if (user_priority && r->pref != rule->pref) continue; if (rule->iifname[0] && memcmp(r->iifname, rule->iifname, IFNAMSIZ)) continue; if (rule->oifname[0] && memcmp(r->oifname, rule->oifname, IFNAMSIZ)) continue; if (rule->mark && r->mark != rule->mark) continue; if (rule->suppress_ifgroup != -1 && r->suppress_ifgroup != rule->suppress_ifgroup) continue; if (rule->suppress_prefixlen != -1 && r->suppress_prefixlen != rule->suppress_prefixlen) continue; if (rule->mark_mask && r->mark_mask != rule->mark_mask) continue; if (rule->tun_id && r->tun_id != rule->tun_id) continue; if (r->fr_net != rule->fr_net) continue; if (rule->l3mdev && r->l3mdev != rule->l3mdev) continue; if (uid_range_set(&rule->uid_range) && (!uid_eq(r->uid_range.start, rule->uid_range.start) || !uid_eq(r->uid_range.end, rule->uid_range.end))) continue; if (rule->ip_proto && r->ip_proto != rule->ip_proto) continue; if (rule->proto && r->proto != rule->proto) continue; if (fib_rule_port_range_set(&rule->sport_range) && !fib_rule_port_range_compare(&r->sport_range, &rule->sport_range)) continue; if (fib_rule_port_range_set(&rule->dport_range) && !fib_rule_port_range_compare(&r->dport_range, &rule->dport_range)) continue; if (!ops->compare(r, frh, tb)) continue; return r; } return NULL; } #ifdef CONFIG_NET_L3_MASTER_DEV static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule, struct netlink_ext_ack *extack) { nlrule->l3mdev = nla_get_u8(nla); if (nlrule->l3mdev != 1) { NL_SET_ERR_MSG(extack, "Invalid l3mdev attribute"); return -1; } return 0; } #else static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule, struct netlink_ext_ack *extack) { NL_SET_ERR_MSG(extack, "l3mdev support is not enabled in kernel"); return -1; } #endif static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, struct fib_rules_ops *ops, struct nlattr *tb[], struct fib_rule **rule, bool *user_priority) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rule *nlrule = NULL; int err = -EINVAL; if (frh->src_len) if (!tb[FRA_SRC] || frh->src_len > (ops->addr_size * 8) || nla_len(tb[FRA_SRC]) != ops->addr_size) { NL_SET_ERR_MSG(extack, "Invalid source address"); goto errout; } if (frh->dst_len) if (!tb[FRA_DST] || frh->dst_len > (ops->addr_size * 8) || nla_len(tb[FRA_DST]) != ops->addr_size) { NL_SET_ERR_MSG(extack, "Invalid dst address"); goto errout; } nlrule = kzalloc(ops->rule_size, GFP_KERNEL); if (!nlrule) { err = -ENOMEM; goto errout; } refcount_set(&nlrule->refcnt, 1); nlrule->fr_net = net; if (tb[FRA_PRIORITY]) { nlrule->pref = nla_get_u32(tb[FRA_PRIORITY]); *user_priority = true; } else { nlrule->pref = fib_default_rule_pref(ops); } nlrule->proto = tb[FRA_PROTOCOL] ? nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC; if (tb[FRA_IIFNAME]) { struct net_device *dev; nlrule->iifindex = -1; nla_strlcpy(nlrule->iifname, tb[FRA_IIFNAME], IFNAMSIZ); dev = __dev_get_by_name(net, nlrule->iifname); if (dev) nlrule->iifindex = dev->ifindex; } if (tb[FRA_OIFNAME]) { struct net_device *dev; nlrule->oifindex = -1; nla_strlcpy(nlrule->oifname, tb[FRA_OIFNAME], IFNAMSIZ); dev = __dev_get_by_name(net, nlrule->oifname); if (dev) nlrule->oifindex = dev->ifindex; } if (tb[FRA_FWMARK]) { nlrule->mark = nla_get_u32(tb[FRA_FWMARK]); if (nlrule->mark) /* compatibility: if the mark value is non-zero all bits * are compared unless a mask is explicitly specified. */ nlrule->mark_mask = 0xFFFFFFFF; } if (tb[FRA_FWMASK]) nlrule->mark_mask = nla_get_u32(tb[FRA_FWMASK]); if (tb[FRA_TUN_ID]) nlrule->tun_id = nla_get_be64(tb[FRA_TUN_ID]); err = -EINVAL; if (tb[FRA_L3MDEV] && fib_nl2rule_l3mdev(tb[FRA_L3MDEV], nlrule, extack) < 0) goto errout_free; nlrule->action = frh->action; nlrule->flags = frh->flags; nlrule->table = frh_get_table(frh, tb); if (tb[FRA_SUPPRESS_PREFIXLEN]) nlrule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]); else nlrule->suppress_prefixlen = -1; if (tb[FRA_SUPPRESS_IFGROUP]) nlrule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]); else nlrule->suppress_ifgroup = -1; if (tb[FRA_GOTO]) { if (nlrule->action != FR_ACT_GOTO) { NL_SET_ERR_MSG(extack, "Unexpected goto"); goto errout_free; } nlrule->target = nla_get_u32(tb[FRA_GOTO]); /* Backward jumps are prohibited to avoid endless loops */ if (nlrule->target <= nlrule->pref) { NL_SET_ERR_MSG(extack, "Backward goto not supported"); goto errout_free; } } else if (nlrule->action == FR_ACT_GOTO) { NL_SET_ERR_MSG(extack, "Missing goto target for action goto"); goto errout_free; } if (nlrule->l3mdev && nlrule->table) { NL_SET_ERR_MSG(extack, "l3mdev and table are mutually exclusive"); goto errout_free; } if (tb[FRA_UID_RANGE]) { if (current_user_ns() != net->user_ns) { err = -EPERM; NL_SET_ERR_MSG(extack, "No permission to set uid"); goto errout_free; } nlrule->uid_range = nla_get_kuid_range(tb); if (!uid_range_set(&nlrule->uid_range) || !uid_lte(nlrule->uid_range.start, nlrule->uid_range.end)) { NL_SET_ERR_MSG(extack, "Invalid uid range"); goto errout_free; } } else { nlrule->uid_range = fib_kuid_range_unset; } if (tb[FRA_IP_PROTO]) nlrule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]); if (tb[FRA_SPORT_RANGE]) { err = nla_get_port_range(tb[FRA_SPORT_RANGE], &nlrule->sport_range); if (err) { NL_SET_ERR_MSG(extack, "Invalid sport range"); goto errout_free; } } if (tb[FRA_DPORT_RANGE]) { err = nla_get_port_range(tb[FRA_DPORT_RANGE], &nlrule->dport_range); if (err) { NL_SET_ERR_MSG(extack, "Invalid dport range"); goto errout_free; } } *rule = nlrule; return 0; errout_free: kfree(nlrule); errout: return err; } static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, struct nlattr **tb, struct fib_rule *rule) { struct fib_rule *r; list_for_each_entry(r, &ops->rules_list, list) { if (r->action != rule->action) continue; if (r->table != rule->table) continue; if (r->pref != rule->pref) continue; if (memcmp(r->iifname, rule->iifname, IFNAMSIZ)) continue; if (memcmp(r->oifname, rule->oifname, IFNAMSIZ)) continue; if (r->mark != rule->mark) continue; if (r->suppress_ifgroup != rule->suppress_ifgroup) continue; if (r->suppress_prefixlen != rule->suppress_prefixlen) continue; if (r->mark_mask != rule->mark_mask) continue; if (r->tun_id != rule->tun_id) continue; if (r->fr_net != rule->fr_net) continue; if (r->l3mdev != rule->l3mdev) continue; if (!uid_eq(r->uid_range.start, rule->uid_range.start) || !uid_eq(r->uid_range.end, rule->uid_range.end)) continue; if (r->ip_proto != rule->ip_proto) continue; if (r->proto != rule->proto) continue; if (!fib_rule_port_range_compare(&r->sport_range, &rule->sport_range)) continue; if (!fib_rule_port_range_compare(&r->dport_range, &rule->dport_range)) continue; if (!ops->compare(r, frh, tb)) continue; return 1; } return 0; } int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; struct fib_rule *rule = NULL, *r, *last = NULL; struct nlattr *tb[FRA_MAX + 1]; int err = -EINVAL, unresolved = 0; bool user_priority = false; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) { NL_SET_ERR_MSG(extack, "Invalid msg length"); goto errout; } ops = lookup_rules_ops(net, frh->family); if (!ops) { err = -EAFNOSUPPORT; NL_SET_ERR_MSG(extack, "Rule family not supported"); goto errout; } err = nlmsg_parse_deprecated(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack); if (err < 0) { NL_SET_ERR_MSG(extack, "Error parsing msg"); goto errout; } err = fib_nl2rule(skb, nlh, extack, ops, tb, &rule, &user_priority); if (err) goto errout; if ((nlh->nlmsg_flags & NLM_F_EXCL) && rule_exists(ops, frh, tb, rule)) { err = -EEXIST; goto errout_free; } err = ops->configure(rule, skb, frh, tb, extack); if (err < 0) goto errout_free; err = call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, extack); if (err < 0) goto errout_free; list_for_each_entry(r, &ops->rules_list, list) { if (r->pref == rule->target) { RCU_INIT_POINTER(rule->ctarget, r); break; } } if (rcu_dereference_protected(rule->ctarget, 1) == NULL) unresolved = 1; list_for_each_entry(r, &ops->rules_list, list) { if (r->pref > rule->pref) break; last = r; } if (last) list_add_rcu(&rule->list, &last->list); else list_add_rcu(&rule->list, &ops->rules_list); if (ops->unresolved_rules) { /* * There are unresolved goto rules in the list, check if * any of them are pointing to this new rule. */ list_for_each_entry(r, &ops->rules_list, list) { if (r->action == FR_ACT_GOTO && r->target == rule->pref && rtnl_dereference(r->ctarget) == NULL) { rcu_assign_pointer(r->ctarget, rule); if (--ops->unresolved_rules == 0) break; } } } if (rule->action == FR_ACT_GOTO) ops->nr_goto_rules++; if (unresolved) ops->unresolved_rules++; if (rule->tun_id) ip_tunnel_need_metadata(); notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); flush_route_cache(ops); rules_ops_put(ops); return 0; errout_free: kfree(rule); errout: rules_ops_put(ops); return err; } EXPORT_SYMBOL_GPL(fib_nl_newrule); int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; struct fib_rule *rule = NULL, *r, *nlrule = NULL; struct nlattr *tb[FRA_MAX+1]; int err = -EINVAL; bool user_priority = false; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) { NL_SET_ERR_MSG(extack, "Invalid msg length"); goto errout; } ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { err = -EAFNOSUPPORT; NL_SET_ERR_MSG(extack, "Rule family not supported"); goto errout; } err = nlmsg_parse_deprecated(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack); if (err < 0) { NL_SET_ERR_MSG(extack, "Error parsing msg"); goto errout; } err = fib_nl2rule(skb, nlh, extack, ops, tb, &nlrule, &user_priority); if (err) goto errout; rule = rule_find(ops, frh, tb, nlrule, user_priority); if (!rule) { err = -ENOENT; goto errout; } if (rule->flags & FIB_RULE_PERMANENT) { err = -EPERM; goto errout; } if (ops->delete) { err = ops->delete(rule); if (err) goto errout; } if (rule->tun_id) ip_tunnel_unneed_metadata(); list_del_rcu(&rule->list); if (rule->action == FR_ACT_GOTO) { ops->nr_goto_rules--; if (rtnl_dereference(rule->ctarget) == NULL) ops->unresolved_rules--; } /* * Check if this rule is a target to any of them. If so, * adjust to the next one with the same preference or * disable them. As this operation is eventually very * expensive, it is only performed if goto rules, except * current if it is goto rule, have actually been added. */ if (ops->nr_goto_rules > 0) { struct fib_rule *n; n = list_next_entry(rule, list); if (&n->list == &ops->rules_list || n->pref != rule->pref) n = NULL; list_for_each_entry(r, &ops->rules_list, list) { if (rtnl_dereference(r->ctarget) != rule) continue; rcu_assign_pointer(r->ctarget, n); if (!n) ops->unresolved_rules++; } } call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops, NULL); notify_rule_change(RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).portid); fib_rule_put(rule); flush_route_cache(ops); rules_ops_put(ops); kfree(nlrule); return 0; errout: kfree(nlrule); rules_ops_put(ops); return err; } EXPORT_SYMBOL_GPL(fib_nl_delrule); static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, struct fib_rule *rule) { size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr)) + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */ + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */ + nla_total_size(4) /* FRA_PRIORITY */ + nla_total_size(4) /* FRA_TABLE */ + nla_total_size(4) /* FRA_SUPPRESS_PREFIXLEN */ + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */ + nla_total_size(4) /* FRA_FWMARK */ + nla_total_size(4) /* FRA_FWMASK */ + nla_total_size_64bit(8) /* FRA_TUN_ID */ + nla_total_size(sizeof(struct fib_kuid_range)) + nla_total_size(1) /* FRA_PROTOCOL */ + nla_total_size(1) /* FRA_IP_PROTO */ + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */ + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */ if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); return payload; } static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, u32 pid, u32 seq, int type, int flags, struct fib_rules_ops *ops) { struct nlmsghdr *nlh; struct fib_rule_hdr *frh; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags); if (nlh == NULL) return -EMSGSIZE; frh = nlmsg_data(nlh); frh->family = ops->family; frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT; if (nla_put_u32(skb, FRA_TABLE, rule->table)) goto nla_put_failure; if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen)) goto nla_put_failure; frh->res1 = 0; frh->res2 = 0; frh->action = rule->action; frh->flags = rule->flags; if (nla_put_u8(skb, FRA_PROTOCOL, rule->proto)) goto nla_put_failure; if (rule->action == FR_ACT_GOTO && rcu_access_pointer(rule->ctarget) == NULL) frh->flags |= FIB_RULE_UNRESOLVED; if (rule->iifname[0]) { if (nla_put_string(skb, FRA_IIFNAME, rule->iifname)) goto nla_put_failure; if (rule->iifindex == -1) frh->flags |= FIB_RULE_IIF_DETACHED; } if (rule->oifname[0]) { if (nla_put_string(skb, FRA_OIFNAME, rule->oifname)) goto nla_put_failure; if (rule->oifindex == -1) frh->flags |= FIB_RULE_OIF_DETACHED; } if ((rule->pref && nla_put_u32(skb, FRA_PRIORITY, rule->pref)) || (rule->mark && nla_put_u32(skb, FRA_FWMARK, rule->mark)) || ((rule->mark_mask || rule->mark) && nla_put_u32(skb, FRA_FWMASK, rule->mark_mask)) || (rule->target && nla_put_u32(skb, FRA_GOTO, rule->target)) || (rule->tun_id && nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) || (rule->l3mdev && nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) || (uid_range_set(&rule->uid_range) && nla_put_uid_range(skb, &rule->uid_range)) || (fib_rule_port_range_set(&rule->sport_range) && nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) || (fib_rule_port_range_set(&rule->dport_range) && nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) || (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto))) goto nla_put_failure; if (rule->suppress_ifgroup != -1) { if (nla_put_u32(skb, FRA_SUPPRESS_IFGROUP, rule->suppress_ifgroup)) goto nla_put_failure; } if (ops->fill(rule, skb, frh) < 0) goto nla_put_failure; nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, struct fib_rules_ops *ops) { int idx = 0; struct fib_rule *rule; int err = 0; rcu_read_lock(); list_for_each_entry_rcu(rule, &ops->rules_list, list) { if (idx < cb->args[1]) goto skip; err = fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWRULE, NLM_F_MULTI, ops); if (err) break; skip: idx++; } rcu_read_unlock(); cb->args[1] = idx; rules_ops_put(ops); return err; } static int fib_valid_dumprule_req(const struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct fib_rule_hdr *frh; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) { NL_SET_ERR_MSG(extack, "Invalid header for fib rule dump request"); return -EINVAL; } frh = nlmsg_data(nlh); if (frh->dst_len || frh->src_len || frh->tos || frh->table || frh->res1 || frh->res2 || frh->action || frh->flags) { NL_SET_ERR_MSG(extack, "Invalid values in header for fib rule dump request"); return -EINVAL; } if (nlmsg_attrlen(nlh, sizeof(*frh))) { NL_SET_ERR_MSG(extack, "Invalid data after header in fib rule dump request"); return -EINVAL; } return 0; } static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); struct fib_rules_ops *ops; int idx = 0, family; if (cb->strict_check) { int err = fib_valid_dumprule_req(nlh, cb->extack); if (err < 0) return err; } family = rtnl_msg_family(nlh); if (family != AF_UNSPEC) { /* Protocol specific dump request */ ops = lookup_rules_ops(net, family); if (ops == NULL) return -EAFNOSUPPORT; dump_rules(skb, cb, ops); return skb->len; } rcu_read_lock(); list_for_each_entry_rcu(ops, &net->rules_ops, list) { if (idx < cb->args[0] || !try_module_get(ops->owner)) goto skip; if (dump_rules(skb, cb, ops) < 0) break; cb->args[1] = 0; skip: idx++; } rcu_read_unlock(); cb->args[0] = idx; return skb->len; } static void notify_rule_change(int event, struct fib_rule *rule, struct fib_rules_ops *ops, struct nlmsghdr *nlh, u32 pid) { struct net *net; struct sk_buff *skb; int err = -ENOMEM; net = ops->fro_net; skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL); if (skb == NULL) goto errout; err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops); if (err < 0) { /* -EMSGSIZE implies BUG in fib_rule_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL); return; errout: if (err < 0) rtnl_set_sk_err(net, ops->nlgroup, err); } static void attach_rules(struct list_head *rules, struct net_device *dev) { struct fib_rule *rule; list_for_each_entry(rule, rules, list) { if (rule->iifindex == -1 && strcmp(dev->name, rule->iifname) == 0) rule->iifindex = dev->ifindex; if (rule->oifindex == -1 && strcmp(dev->name, rule->oifname) == 0) rule->oifindex = dev->ifindex; } } static void detach_rules(struct list_head *rules, struct net_device *dev) { struct fib_rule *rule; list_for_each_entry(rule, rules, list) { if (rule->iifindex == dev->ifindex) rule->iifindex = -1; if (rule->oifindex == dev->ifindex) rule->oifindex = -1; } } static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct fib_rules_ops *ops; ASSERT_RTNL(); switch (event) { case NETDEV_REGISTER: list_for_each_entry(ops, &net->rules_ops, list) attach_rules(&ops->rules_list, dev); break; case NETDEV_CHANGENAME: list_for_each_entry(ops, &net->rules_ops, list) { detach_rules(&ops->rules_list, dev); attach_rules(&ops->rules_list, dev); } break; case NETDEV_UNREGISTER: list_for_each_entry(ops, &net->rules_ops, list) detach_rules(&ops->rules_list, dev); break; } return NOTIFY_DONE; } static struct notifier_block fib_rules_notifier = { .notifier_call = fib_rules_event, }; static int __net_init fib_rules_net_init(struct net *net) { INIT_LIST_HEAD(&net->rules_ops); spin_lock_init(&net->rules_mod_lock); return 0; } static void __net_exit fib_rules_net_exit(struct net *net) { WARN_ON_ONCE(!list_empty(&net->rules_ops)); } static struct pernet_operations fib_rules_net_ops = { .init = fib_rules_net_init, .exit = fib_rules_net_exit, }; static int __init fib_rules_init(void) { int err; rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, 0); err = register_pernet_subsys(&fib_rules_net_ops); if (err < 0) goto fail; err = register_netdevice_notifier(&fib_rules_notifier); if (err < 0) goto fail_unregister; return 0; fail_unregister: unregister_pernet_subsys(&fib_rules_net_ops); fail: rtnl_unregister(PF_UNSPEC, RTM_NEWRULE); rtnl_unregister(PF_UNSPEC, RTM_DELRULE); rtnl_unregister(PF_UNSPEC, RTM_GETRULE); return err; } subsys_initcall(fib_rules_init);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 /* SPDX-License-Identifier: GPL-2.0 */ /* * include/linux/buffer_head.h * * Everything to do with buffer_heads. */ #ifndef _LINUX_BUFFER_HEAD_H #define _LINUX_BUFFER_HEAD_H #include <linux/types.h> #include <linux/fs.h> #include <linux/linkage.h> #include <linux/pagemap.h> #include <linux/wait.h> #include <linux/atomic.h> #ifdef CONFIG_BLOCK enum bh_state_bits { BH_Uptodate, /* Contains valid data */ BH_Dirty, /* Is dirty */ BH_Lock, /* Is locked */ BH_Req, /* Has been submitted for I/O */ BH_Mapped, /* Has a disk mapping */ BH_New, /* Disk mapping was newly created by get_block */ BH_Async_Read, /* Is under end_buffer_async_read I/O */ BH_Async_Write, /* Is under end_buffer_async_write I/O */ BH_Delay, /* Buffer is not yet allocated on disk */ BH_Boundary, /* Block is followed by a discontiguity */ BH_Write_EIO, /* I/O error on write */ BH_Unwritten, /* Buffer is allocated on disk but not written */ BH_Quiet, /* Buffer Error Prinks to be quiet */ BH_Meta, /* Buffer contains metadata */ BH_Prio, /* Buffer should be submitted with REQ_PRIO */ BH_Defer_Completion, /* Defer AIO completion to workqueue */ BH_PrivateStart,/* not a state bit, but the first bit available * for private allocation by other entities */ }; #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512) struct page; struct buffer_head; struct address_space; typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); /* * Historically, a buffer_head was used to map a single block * within a page, and of course as the unit of I/O through the * filesystem and block layers. Nowadays the basic I/O unit * is the bio, and buffer_heads are used for extracting block * mappings (via a get_block_t call), for tracking state within * a page (via a page_mapping) and for wrapping bio submission * for backward compatibility reasons (e.g. submit_bh). */ struct buffer_head { unsigned long b_state; /* buffer state bitmap (see above) */ struct buffer_head *b_this_page;/* circular list of page's buffers */ struct page *b_page; /* the page this bh is mapped to */ sector_t b_blocknr; /* start block number */ size_t b_size; /* size of mapping */ char *b_data; /* pointer to data within the page */ struct block_device *b_bdev; bh_end_io_t *b_end_io; /* I/O completion */ void *b_private; /* reserved for b_end_io */ struct list_head b_assoc_buffers; /* associated with another mapping */ struct address_space *b_assoc_map; /* mapping this buffer is associated with */ atomic_t b_count; /* users using this buffer_head */ spinlock_t b_uptodate_lock; /* Used by the first bh in a page, to * serialise IO completion of other * buffers in the page */ }; /* * macro tricks to expand the set_buffer_foo(), clear_buffer_foo() * and buffer_foo() functions. * To avoid reset buffer flags that are already set, because that causes * a costly cache line transition, check the flag first. */ #define BUFFER_FNS(bit, name) \ static __always_inline void set_buffer_##name(struct buffer_head *bh) \ { \ if (!test_bit(BH_##bit, &(bh)->b_state)) \ set_bit(BH_##bit, &(bh)->b_state); \ } \ static __always_inline void clear_buffer_##name(struct buffer_head *bh) \ { \ clear_bit(BH_##bit, &(bh)->b_state); \ } \ static __always_inline int buffer_##name(const struct buffer_head *bh) \ { \ return test_bit(BH_##bit, &(bh)->b_state); \ } /* * test_set_buffer_foo() and test_clear_buffer_foo() */ #define TAS_BUFFER_FNS(bit, name) \ static __always_inline int test_set_buffer_##name(struct buffer_head *bh) \ { \ return test_and_set_bit(BH_##bit, &(bh)->b_state); \ } \ static __always_inline int test_clear_buffer_##name(struct buffer_head *bh) \ { \ return test_and_clear_bit(BH_##bit, &(bh)->b_state); \ } \ /* * Emit the buffer bitops functions. Note that there are also functions * of the form "mark_buffer_foo()". These are higher-level functions which * do something in addition to setting a b_state bit. */ BUFFER_FNS(Uptodate, uptodate) BUFFER_FNS(Dirty, dirty) TAS_BUFFER_FNS(Dirty, dirty) BUFFER_FNS(Lock, locked) BUFFER_FNS(Req, req) TAS_BUFFER_FNS(Req, req) BUFFER_FNS(Mapped, mapped) BUFFER_FNS(New, new) BUFFER_FNS(Async_Read, async_read) BUFFER_FNS(Async_Write, async_write) BUFFER_FNS(Delay, delay) BUFFER_FNS(Boundary, boundary) BUFFER_FNS(Write_EIO, write_io_error) BUFFER_FNS(Unwritten, unwritten) BUFFER_FNS(Meta, meta) BUFFER_FNS(Prio, prio) BUFFER_FNS(Defer_Completion, defer_completion) #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) /* If we *know* page->private refers to buffer_heads */ #define page_buffers(page) \ ({ \ BUG_ON(!PagePrivate(page)); \ ((struct buffer_head *)page_private(page)); \ }) #define page_has_buffers(page) PagePrivate(page) void buffer_check_dirty_writeback(struct page *page, bool *dirty, bool *writeback); /* * Declarations */ void mark_buffer_dirty(struct buffer_head *bh); void mark_buffer_write_io_error(struct buffer_head *bh); void touch_buffer(struct buffer_head *bh); void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset); int try_to_free_buffers(struct page *); struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, bool retry); void create_empty_buffers(struct page *, unsigned long, unsigned long b_state); void end_buffer_read_sync(struct buffer_head *bh, int uptodate); void end_buffer_write_sync(struct buffer_head *bh, int uptodate); void end_buffer_async_write(struct buffer_head *bh, int uptodate); /* Things to do with buffers at mapping->private_list */ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode); int inode_has_buffers(struct inode *); void invalidate_inode_buffers(struct inode *); int remove_inode_buffers(struct inode *inode); int sync_mapping_buffers(struct address_space *mapping); void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len); static inline void clean_bdev_bh_alias(struct buffer_head *bh) { clean_bdev_aliases(bh->b_bdev, bh->b_blocknr, 1); } void mark_buffer_async_write(struct buffer_head *bh); void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, unsigned int size); void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size, gfp_t gfp); struct buffer_head *__bread_gfp(struct block_device *, sector_t block, unsigned size, gfp_t gfp); void invalidate_bh_lrus(void); struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); void ll_rw_block(int, int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, int op_flags); void write_dirty_buffer(struct buffer_head *bh, int op_flags); int submit_bh(int, int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); int bh_uptodate_or_lock(struct buffer_head *bh); int bh_submit_read(struct buffer_head *bh); extern int buffer_heads_over_limit; /* * Generic address_space_operations implementations for buffer_head-backed * address_spaces. */ void block_invalidatepage(struct page *page, unsigned int offset, unsigned int length); int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc); int __block_write_full_page(struct inode *inode, struct page *page, get_block_t *get_block, struct writeback_control *wbc, bh_end_io_t *handler); int block_read_full_page(struct page*, get_block_t*); int block_is_partially_uptodate(struct page *page, unsigned long from, unsigned long count); int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, get_block_t *get_block); int __block_write_begin(struct page *page, loff_t pos, unsigned len, get_block_t *get_block); int block_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page *, void *); int generic_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page *, void *); void page_zero_new_buffers(struct page *page, unsigned from, unsigned to); void clean_page_buffers(struct page *page); int cont_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); /* Convert errno to return value from ->page_mkwrite() call */ static inline vm_fault_t block_page_mkwrite_return(int err) { if (err == 0) return VM_FAULT_LOCKED; if (err == -EFAULT || err == -EAGAIN) return VM_FAULT_NOPAGE; if (err == -ENOMEM) return VM_FAULT_OOM; /* -ENOSPC, -EDQUOT, -EIO ... */ return VM_FAULT_SIGBUS; } sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *); int nobh_write_begin(struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t*); int nobh_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page *, void *); int nobh_truncate_page(struct address_space *, loff_t, get_block_t *); int nobh_writepage(struct page *page, get_block_t *get_block, struct writeback_control *wbc); void buffer_init(void); /* * inline definitions */ static inline void get_bh(struct buffer_head *bh) { atomic_inc(&bh->b_count); } static inline void put_bh(struct buffer_head *bh) { smp_mb__before_atomic(); atomic_dec(&bh->b_count); } static inline void brelse(struct buffer_head *bh) { if (bh) __brelse(bh); } static inline void bforget(struct buffer_head *bh) { if (bh) __bforget(bh); } static inline struct buffer_head * sb_bread(struct super_block *sb, sector_t block) { return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); } static inline struct buffer_head * sb_bread_unmovable(struct super_block *sb, sector_t block) { return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, 0); } static inline void sb_breadahead(struct super_block *sb, sector_t block) { __breadahead(sb->s_bdev, block, sb->s_blocksize); } static inline void sb_breadahead_unmovable(struct super_block *sb, sector_t block) { __breadahead_gfp(sb->s_bdev, block, sb->s_blocksize, 0); } static inline struct buffer_head * sb_getblk(struct super_block *sb, sector_t block) { return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); } static inline struct buffer_head * sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp) { return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp); } static inline struct buffer_head * sb_find_get_block(struct super_block *sb, sector_t block) { return __find_get_block(sb->s_bdev, block, sb->s_blocksize); } static inline void map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block) { set_buffer_mapped(bh); bh->b_bdev = sb->s_bdev; bh->b_blocknr = block; bh->b_size = sb->s_blocksize; } static inline void wait_on_buffer(struct buffer_head *bh) { might_sleep(); if (buffer_locked(bh)) __wait_on_buffer(bh); } static inline int trylock_buffer(struct buffer_head *bh) { return likely(!test_and_set_bit_lock(BH_Lock, &bh->b_state)); } static inline void lock_buffer(struct buffer_head *bh) { might_sleep(); if (!trylock_buffer(bh)) __lock_buffer(bh); } static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, sector_t block, unsigned size) { return __getblk_gfp(bdev, block, size, 0); } static inline struct buffer_head *__getblk(struct block_device *bdev, sector_t block, unsigned size) { return __getblk_gfp(bdev, block, size, __GFP_MOVABLE); } /** * __bread() - reads a specified block and returns the bh * @bdev: the block_device to read from * @block: number of block * @size: size (in bytes) to read * * Reads a specified block, and returns buffer head that contains it. * The page cache is allocated from movable area so that it can be migrated. * It returns NULL if the block was unreadable. */ static inline struct buffer_head * __bread(struct block_device *bdev, sector_t block, unsigned size) { return __bread_gfp(bdev, block, size, __GFP_MOVABLE); } extern int __set_page_dirty_buffers(struct page *page); #else /* CONFIG_BLOCK */ static inline void buffer_init(void) {} static inline int try_to_free_buffers(struct page *page) { return 1; } static inline int inode_has_buffers(struct inode *inode) { return 0; } static inline void invalidate_inode_buffers(struct inode *inode) {} static inline int remove_inode_buffers(struct inode *inode) { return 1; } static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } #define buffer_heads_over_limit 0 #endif /* CONFIG_BLOCK */ #endif /* _LINUX_BUFFER_HEAD_H */
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_ERR_H #define _LINUX_ERR_H #include <linux/compiler.h> #include <linux/types.h> #include <asm/errno.h> /* * Kernel pointers have redundant information, so we can use a * scheme where we can return either an error code or a normal * pointer with the same return value. * * This should be a per-architecture thing, to allow different * error and pointer decisions. */ #define MAX_ERRNO 4095 #ifndef __ASSEMBLY__ #define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO) static inline void * __must_check ERR_PTR(long error) { return (void *) error; } static inline long __must_check PTR_ERR(__force const void *ptr) { return (long) ptr; } static inline bool __must_check IS_ERR(__force const void *ptr) { return IS_ERR_VALUE((unsigned long)ptr); } static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr) { return unlikely(!ptr) || IS_ERR_VALUE((unsigned long)ptr); } /** * ERR_CAST - Explicitly cast an error-valued pointer to another pointer type * @ptr: The pointer to cast. * * Explicitly cast an error-valued pointer to another pointer type in such a * way as to make it clear that's what's going on. */ static inline void * __must_check ERR_CAST(__force const void *ptr) { /* cast away the const */ return (void *) ptr; } static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr) { if (IS_ERR(ptr)) return PTR_ERR(ptr); else return 0; } #endif #endif /* _LINUX_ERR_H */
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_CGROUP_H #define _LINUX_CGROUP_H /* * cgroup interface * * Copyright (C) 2003 BULL SA * Copyright (C) 2004-2006 Silicon Graphics, Inc. * */ #include <linux/sched.h> #include <linux/cpumask.h> #include <linux/nodemask.h> #include <linux/rculist.h> #include <linux/cgroupstats.h> #include <linux/fs.h> #include <linux/seq_file.h> #include <linux/kernfs.h> #include <linux/jump_label.h> #include <linux/types.h> #include <linux/ns_common.h> #include <linux/nsproxy.h> #include <linux/user_namespace.h> #include <linux/refcount.h> #include <linux/kernel_stat.h> #include <linux/cgroup-defs.h> struct kernel_clone_args; #ifdef CONFIG_CGROUPS /* * All weight knobs on the default hierarhcy should use the following min, * default and max values. The default value is the logarithmic center of * MIN and MAX and allows 100x to be expressed in both directions. */ #define CGROUP_WEIGHT_MIN 1 #define CGROUP_WEIGHT_DFL 100 #define CGROUP_WEIGHT_MAX 10000 /* walk only threadgroup leaders */ #define CSS_TASK_ITER_PROCS (1U << 0) /* walk all threaded css_sets in the domain */ #define CSS_TASK_ITER_THREADED (1U << 1) /* internal flags */ #define CSS_TASK_ITER_SKIPPED (1U << 16) /* a css_task_iter should be treated as an opaque object */ struct css_task_iter { struct cgroup_subsys *ss; unsigned int flags; struct list_head *cset_pos; struct list_head *cset_head; struct list_head *tcset_pos; struct list_head *tcset_head; struct list_head *task_pos; struct list_head *cur_tasks_head; struct css_set *cur_cset; struct css_set *cur_dcset; struct task_struct *cur_task; struct list_head iters_node; /* css_set->task_iters */ }; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; #include <linux/cgroup_subsys.h> #undef SUBSYS #define SUBSYS(_x) \ extern struct static_key_true _x ## _cgrp_subsys_enabled_key; \ extern struct static_key_true _x ## _cgrp_subsys_on_dfl_key; #include <linux/cgroup_subsys.h> #undef SUBSYS /** * cgroup_subsys_enabled - fast test on whether a subsys is enabled * @ss: subsystem in question */ #define cgroup_subsys_enabled(ss) \ static_branch_likely(&ss ## _enabled_key) /** * cgroup_subsys_on_dfl - fast test on whether a subsys is on default hierarchy * @ss: subsystem in question */ #define cgroup_subsys_on_dfl(ss) \ static_branch_likely(&ss ## _on_dfl_key) bool css_has_online_children(struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup, struct cgroup_subsys *ss); struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, struct cgroup_subsys *ss); struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, struct cgroup_subsys *ss); struct cgroup *cgroup_get_from_path(const char *path); struct cgroup *cgroup_get_from_fd(int fd); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cftype *cfts); void cgroup_file_notify(struct cgroup_file *cfile); int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk); void cgroup_fork(struct task_struct *p); extern int cgroup_can_fork(struct task_struct *p, struct kernel_clone_args *kargs); extern void cgroup_cancel_fork(struct task_struct *p, struct kernel_clone_args *kargs); extern void cgroup_post_fork(struct task_struct *p, struct kernel_clone_args *kargs); void cgroup_exit(struct task_struct *p); void cgroup_release(struct task_struct *p); void cgroup_free(struct task_struct *p); int cgroup_init_early(void); int cgroup_init(void); int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v); /* * Iteration helpers and macros. */ struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, struct cgroup_subsys_state *parent); struct cgroup_subsys_state *css_next_descendant_pre(struct cgroup_subsys_state *pos, struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state *pos); struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos, struct cgroup_subsys_state *css); struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset, struct cgroup_subsys_state **dst_cssp); struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, struct cgroup_subsys_state **dst_cssp); void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags, struct css_task_iter *it); struct task_struct *css_task_iter_next(struct css_task_iter *it); void css_task_iter_end(struct css_task_iter *it); /** * css_for_each_child - iterate through children of a css * @pos: the css * to use as the loop cursor * @parent: css whose children to walk * * Walk @parent's children. Must be called under rcu_read_lock(). * * If a subsystem synchronizes ->css_online() and the start of iteration, a * css which finished ->css_online() is guaranteed to be visible in the * future iterations and will stay visible until the last reference is put. * A css which hasn't finished ->css_online() or already finished * ->css_offline() may show up during traversal. It's each subsystem's * responsibility to synchronize against on/offlining. * * It is allowed to temporarily drop RCU read lock during iteration. The * caller is responsible for ensuring that @pos remains accessible until * the start of the next iteration by, for example, bumping the css refcnt. */ #define css_for_each_child(pos, parent) \ for ((pos) = css_next_child(NULL, (parent)); (pos); \ (pos) = css_next_child((pos), (parent))) /** * css_for_each_descendant_pre - pre-order walk of a css's descendants * @pos: the css * to use as the loop cursor * @root: css whose descendants to walk * * Walk @root's descendants. @root is included in the iteration and the * first node to be visited. Must be called under rcu_read_lock(). * * If a subsystem synchronizes ->css_online() and the start of iteration, a * css which finished ->css_online() is guaranteed to be visible in the * future iterations and will stay visible until the last reference is put. * A css which hasn't finished ->css_online() or already finished * ->css_offline() may show up during traversal. It's each subsystem's * responsibility to synchronize against on/offlining. * * For example, the following guarantees that a descendant can't escape * state updates of its ancestors. * * my_online(@css) * { * Lock @css's parent and @css; * Inherit state from the parent; * Unlock both. * } * * my_update_state(@css) * { * css_for_each_descendant_pre(@pos, @css) { * Lock @pos; * if (@pos == @css) * Update @css's state; * else * Verify @pos is alive and inherit state from its parent; * Unlock @pos; * } * } * * As long as the inheriting step, including checking the parent state, is * enclosed inside @pos locking, double-locking the parent isn't necessary * while inheriting. The state update to the parent is guaranteed to be * visible by walking order and, as long as inheriting operations to the * same @pos are atomic to each other, multiple updates racing each other * still result in the correct state. It's guaranateed that at least one * inheritance happens for any css after the latest update to its parent. * * If checking parent's state requires locking the parent, each inheriting * iteration should lock and unlock both @pos->parent and @pos. * * Alternatively, a subsystem may choose to use a single global lock to * synchronize ->css_online() and ->css_offline() against tree-walking * operations. * * It is allowed to temporarily drop RCU read lock during iteration. The * caller is responsible for ensuring that @pos remains accessible until * the start of the next iteration by, for example, bumping the css refcnt. */ #define css_for_each_descendant_pre(pos, css) \ for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \ (pos) = css_next_descendant_pre((pos), (css))) /** * css_for_each_descendant_post - post-order walk of a css's descendants * @pos: the css * to use as the loop cursor * @css: css whose descendants to walk * * Similar to css_for_each_descendant_pre() but performs post-order * traversal instead. @root is included in the iteration and the last * node to be visited. * * If a subsystem synchronizes ->css_online() and the start of iteration, a * css which finished ->css_online() is guaranteed to be visible in the * future iterations and will stay visible until the last reference is put. * A css which hasn't finished ->css_online() or already finished * ->css_offline() may show up during traversal. It's each subsystem's * responsibility to synchronize against on/offlining. * * Note that the walk visibility guarantee example described in pre-order * walk doesn't apply the same to post-order walks. */ #define css_for_each_descendant_post(pos, css) \ for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ (pos) = css_next_descendant_post((pos), (css))) /** * cgroup_taskset_for_each - iterate cgroup_taskset * @task: the loop cursor * @dst_css: the destination css * @tset: taskset to iterate * * @tset may contain multiple tasks and they may belong to multiple * processes. * * On the v2 hierarchy, there may be tasks from multiple processes and they * may not share the source or destination csses. * * On traditional hierarchies, when there are multiple tasks in @tset, if a * task of a process is in @tset, all tasks of the process are in @tset. * Also, all are guaranteed to share the same source and destination csses. * * Iteration is not in any specific order. */ #define cgroup_taskset_for_each(task, dst_css, tset) \ for ((task) = cgroup_taskset_first((tset), &(dst_css)); \ (task); \ (task) = cgroup_taskset_next((tset), &(dst_css))) /** * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset * @leader: the loop cursor * @dst_css: the destination css * @tset: taskset to iterate * * Iterate threadgroup leaders of @tset. For single-task migrations, @tset * may not contain any. */ #define cgroup_taskset_for_each_leader(leader, dst_css, tset) \ for ((leader) = cgroup_taskset_first((tset), &(dst_css)); \ (leader); \ (leader) = cgroup_taskset_next((tset), &(dst_css))) \ if ((leader) != (leader)->group_leader) \ ; \ else /* * Inline functions. */ static inline u64 cgroup_id(struct cgroup *cgrp) { return cgrp->kn->id; } /** * css_get - obtain a reference on the specified css * @css: target css * * The caller must already have a reference. */ static inline void css_get(struct cgroup_subsys_state *css) { if (!(css->flags & CSS_NO_REF)) percpu_ref_get(&css->refcnt); } /** * css_get_many - obtain references on the specified css * @css: target css * @n: number of references to get * * The caller must already have a reference. */ static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n) { if (!(css->flags & CSS_NO_REF)) percpu_ref_get_many(&css->refcnt, n); } /** * css_tryget - try to obtain a reference on the specified css * @css: target css * * Obtain a reference on @css unless it already has reached zero and is * being released. This function doesn't care whether @css is on or * offline. The caller naturally needs to ensure that @css is accessible * but doesn't have to be holding a reference on it - IOW, RCU protected * access is good enough for this function. Returns %true if a reference * count was successfully obtained; %false otherwise. */ static inline bool css_tryget(struct cgroup_subsys_state *css) { if (!(css->flags & CSS_NO_REF)) return percpu_ref_tryget(&css->refcnt); return true; } /** * css_tryget_online - try to obtain a reference on the specified css if online * @css: target css * * Obtain a reference on @css if it's online. The caller naturally needs * to ensure that @css is accessible but doesn't have to be holding a * reference on it - IOW, RCU protected access is good enough for this * function. Returns %true if a reference count was successfully obtained; * %false otherwise. */ static inline bool css_tryget_online(struct cgroup_subsys_state *css) { if (!(css->flags & CSS_NO_REF)) return percpu_ref_tryget_live(&css->refcnt); return true; } /** * css_is_dying - test whether the specified css is dying * @css: target css * * Test whether @css is in the process of offlining or already offline. In * most cases, ->css_online() and ->css_offline() callbacks should be * enough; however, the actual offline operations are RCU delayed and this * test returns %true also when @css is scheduled to be offlined. * * This is useful, for example, when the use case requires synchronous * behavior with respect to cgroup removal. cgroup removal schedules css * offlining but the css can seem alive while the operation is being * delayed. If the delay affects user visible semantics, this test can be * used to resolve the situation. */ static inline bool css_is_dying(struct cgroup_subsys_state *css) { return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt); } /** * css_put - put a css reference * @css: target css * * Put a reference obtained via css_get() and css_tryget_online(). */ static inline void css_put(struct cgroup_subsys_state *css) { if (!(css->flags & CSS_NO_REF)) percpu_ref_put(&css->refcnt); } /** * css_put_many - put css references * @css: target css * @n: number of references to put * * Put references obtained via css_get() and css_tryget_online(). */ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) { if (!(css->flags & CSS_NO_REF)) percpu_ref_put_many(&css->refcnt, n); } static inline void cgroup_get(struct cgroup *cgrp) { css_get(&cgrp->self); } static inline bool cgroup_tryget(struct cgroup *cgrp) { return css_tryget(&cgrp->self); } static inline void cgroup_put(struct cgroup *cgrp) { css_put(&cgrp->self); } /** * task_css_set_check - obtain a task's css_set with extra access conditions * @task: the task to obtain css_set for * @__c: extra condition expression to be passed to rcu_dereference_check() * * A task's css_set is RCU protected, initialized and exited while holding * task_lock(), and can only be modified while holding both cgroup_mutex * and task_lock() while the task is alive. This macro verifies that the * caller is inside proper critical section and returns @task's css_set. * * The caller can also specify additional allowed conditions via @__c, such * as locks used during the cgroup_subsys::attach() methods. */ #ifdef CONFIG_PROVE_RCU extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ lockdep_is_held(&cgroup_mutex) || \ lockdep_is_held(&css_set_lock) || \ ((task)->flags & PF_EXITING) || (__c)) #else #define task_css_set_check(task, __c) \ rcu_dereference((task)->cgroups) #endif /** * task_css_check - obtain css for (task, subsys) w/ extra access conds * @task: the target task * @subsys_id: the target subsystem ID * @__c: extra condition expression to be passed to rcu_dereference_check() * * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The * synchronization rules are the same as task_css_set_check(). */ #define task_css_check(task, subsys_id, __c) \ task_css_set_check((task), (__c))->subsys[(subsys_id)] /** * task_css_set - obtain a task's css_set * @task: the task to obtain css_set for * * See task_css_set_check(). */ static inline struct css_set *task_css_set(struct task_struct *task) { return task_css_set_check(task, false); } /** * task_css - obtain css for (task, subsys) * @task: the target task * @subsys_id: the target subsystem ID * * See task_css_check(). */ static inline struct cgroup_subsys_state *task_css(struct task_struct *task, int subsys_id) { return task_css_check(task, subsys_id, false); } /** * task_get_css - find and get the css for (task, subsys) * @task: the target task * @subsys_id: the target subsystem ID * * Find the css for the (@task, @subsys_id) combination, increment a * reference on and return it. This function is guaranteed to return a * valid css. The returned css may already have been offlined. */ static inline struct cgroup_subsys_state * task_get_css(struct task_struct *task, int subsys_id) { struct cgroup_subsys_state *css; rcu_read_lock(); while (true) { css = task_css(task, subsys_id); /* * Can't use css_tryget_online() here. A task which has * PF_EXITING set may stay associated with an offline css. * If such task calls this function, css_tryget_online() * will keep failing. */ if (likely(css_tryget(css))) break; cpu_relax(); } rcu_read_unlock(); return css; } /** * task_css_is_root - test whether a task belongs to the root css * @task: the target task * @subsys_id: the target subsystem ID * * Test whether @task belongs to the root css on the specified subsystem. * May be invoked in any context. */ static inline bool task_css_is_root(struct task_struct *task, int subsys_id) { return task_css_check(task, subsys_id, true) == init_css_set.subsys[subsys_id]; } static inline struct cgroup *task_cgroup(struct task_struct *task, int subsys_id) { return task_css(task, subsys_id)->cgroup; } static inline struct cgroup *task_dfl_cgroup(struct task_struct *task) { return task_css_set(task)->dfl_cgrp; } static inline struct cgroup *cgroup_parent(struct cgroup *cgrp) { struct cgroup_subsys_state *parent_css = cgrp->self.parent; if (parent_css) return container_of(parent_css, struct cgroup, self); return NULL; } /** * cgroup_is_descendant - test ancestry * @cgrp: the cgroup to be tested * @ancestor: possible ancestor of @cgrp * * Test whether @cgrp is a descendant of @ancestor. It also returns %true * if @cgrp == @ancestor. This function is safe to call as long as @cgrp * and @ancestor are accessible. */ static inline bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor) { if (cgrp->root != ancestor->root || cgrp->level < ancestor->level) return false; return cgrp->ancestor_ids[ancestor->level] == cgroup_id(ancestor); } /** * cgroup_ancestor - find ancestor of cgroup * @cgrp: cgroup to find ancestor of * @ancestor_level: level of ancestor to find starting from root * * Find ancestor of cgroup at specified level starting from root if it exists * and return pointer to it. Return NULL if @cgrp doesn't have ancestor at * @ancestor_level. * * This function is safe to call as long as @cgrp is accessible. */ static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp, int ancestor_level) { if (cgrp->level < ancestor_level) return NULL; while (cgrp && cgrp->level > ancestor_level) cgrp = cgroup_parent(cgrp); return cgrp; } /** * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry * @task: the task to be tested * @ancestor: possible ancestor of @task's cgroup * * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor. * It follows all the same rules as cgroup_is_descendant, and only applies * to the default hierarchy. */ static inline bool task_under_cgroup_hierarchy(struct task_struct *task, struct cgroup *ancestor) { struct css_set *cset = task_css_set(task); return cgroup_is_descendant(cset->dfl_cgrp, ancestor); } /* no synchronization, the result can only be used as a hint */ static inline bool cgroup_is_populated(struct cgroup *cgrp) { return cgrp->nr_populated_csets + cgrp->nr_populated_domain_children + cgrp->nr_populated_threaded_children; } /* returns ino associated with a cgroup */ static inline ino_t cgroup_ino(struct cgroup *cgrp) { return kernfs_ino(cgrp->kn); } /* cft/css accessors for cftype->write() operation */ static inline struct cftype *of_cft(struct kernfs_open_file *of) { return of->kn->priv; } struct cgroup_subsys_state *of_css(struct kernfs_open_file *of); /* cft/css accessors for cftype->seq_*() operations */ static inline struct cftype *seq_cft(struct seq_file *seq) { return of_cft(seq->private); } static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq) { return of_css(seq->private); } /* * Name / path handling functions. All are thin wrappers around the kernfs * counterparts and can be called under any context. */ static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen) { return kernfs_name(cgrp->kn, buf, buflen); } static inline int cgroup_path(struct cgroup *cgrp, char *buf, size_t buflen) { return kernfs_path(cgrp->kn, buf, buflen); } static inline void pr_cont_cgroup_name(struct cgroup *cgrp) { pr_cont_kernfs_name(cgrp->kn); } static inline void pr_cont_cgroup_path(struct cgroup *cgrp) { pr_cont_kernfs_path(cgrp->kn); } static inline struct psi_group *cgroup_psi(struct cgroup *cgrp) { return &cgrp->psi; } static inline void cgroup_init_kthreadd(void) { /* * kthreadd is inherited by all kthreads, keep it in the root so * that the new kthreads are guaranteed to stay in the root until * initialization is finished. */ current->no_cgroup_migration = 1; } static inline void cgroup_kthread_ready(void) { /* * This kthread finished initialization. The creator should have * set PF_NO_SETAFFINITY if this kthread should stay in the root. */ current->no_cgroup_migration = 0; } void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen); #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; struct cgroup; static inline u64 cgroup_id(struct cgroup *cgrp) { return 1; } static inline void css_get(struct cgroup_subsys_state *css) {} static inline void css_put(struct cgroup_subsys_state *css) {} static inline int cgroup_attach_task_all(struct task_struct *from, struct task_struct *t) { return 0; } static inline int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) { return -EINVAL; } static inline void cgroup_fork(struct task_struct *p) {} static inline int cgroup_can_fork(struct task_struct *p, struct kernel_clone_args *kargs) { return 0; } static inline void cgroup_cancel_fork(struct task_struct *p, struct kernel_clone_args *kargs) {} static inline void cgroup_post_fork(struct task_struct *p, struct kernel_clone_args *kargs) {} static inline void cgroup_exit(struct task_struct *p) {} static inline void cgroup_release(struct task_struct *p) {} static inline void cgroup_free(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } static inline void cgroup_init_kthreadd(void) {} static inline void cgroup_kthread_ready(void) {} static inline struct cgroup *cgroup_parent(struct cgroup *cgrp) { return NULL; } static inline struct psi_group *cgroup_psi(struct cgroup *cgrp) { return NULL; } static inline bool task_under_cgroup_hierarchy(struct task_struct *task, struct cgroup *ancestor) { return true; } static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) {} #endif /* !CONFIG_CGROUPS */ #ifdef CONFIG_CGROUPS /* * cgroup scalable recursive statistics. */ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu); void cgroup_rstat_flush(struct cgroup *cgrp); void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp); void cgroup_rstat_flush_hold(struct cgroup *cgrp); void cgroup_rstat_flush_release(void); /* * Basic resource stats. */ #ifdef CONFIG_CGROUP_CPUACCT void cpuacct_charge(struct task_struct *tsk, u64 cputime); void cpuacct_account_field(struct task_struct *tsk, int index, u64 val); #else static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} static inline void cpuacct_account_field(struct task_struct *tsk, int index, u64 val) {} #endif void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec); void __cgroup_account_cputime_field(struct cgroup *cgrp, enum cpu_usage_stat index, u64 delta_exec); static inline void cgroup_account_cputime(struct task_struct *task, u64 delta_exec) { struct cgroup *cgrp; cpuacct_charge(task, delta_exec); rcu_read_lock(); cgrp = task_dfl_cgroup(task); if (cgroup_parent(cgrp)) __cgroup_account_cputime(cgrp, delta_exec); rcu_read_unlock(); } static inline void cgroup_account_cputime_field(struct task_struct *task, enum cpu_usage_stat index, u64 delta_exec) { struct cgroup *cgrp; cpuacct_account_field(task, index, delta_exec); rcu_read_lock(); cgrp = task_dfl_cgroup(task); if (cgroup_parent(cgrp)) __cgroup_account_cputime_field(cgrp, index, delta_exec); rcu_read_unlock(); } #else /* CONFIG_CGROUPS */ static inline void cgroup_account_cputime(struct task_struct *task, u64 delta_exec) {} static inline void cgroup_account_cputime_field(struct task_struct *task, enum cpu_usage_stat index, u64 delta_exec) {} #endif /* CONFIG_CGROUPS */ /* * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data * definition in cgroup-defs.h. */ #ifdef CONFIG_SOCK_CGROUP_DATA #if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) extern spinlock_t cgroup_sk_update_lock; #endif void cgroup_sk_alloc_disable(void); void cgroup_sk_alloc(struct sock_cgroup_data *skcd); void cgroup_sk_clone(struct sock_cgroup_data *skcd); void cgroup_sk_free(struct sock_cgroup_data *skcd); static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) { #if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) unsigned long v; /* * @skcd->val is 64bit but the following is safe on 32bit too as we * just need the lower ulong to be written and read atomically. */ v = READ_ONCE(skcd->val); if (v & 3) return &cgrp_dfl_root.cgrp; return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp; #else return (struct cgroup *)(unsigned long)skcd->val; #endif } #else /* CONFIG_CGROUP_DATA */ static inline void cgroup_sk_alloc(struct sock_cgroup_data *skcd) {} static inline void cgroup_sk_clone(struct sock_cgroup_data *skcd) {} static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {} #endif /* CONFIG_CGROUP_DATA */ struct cgroup_namespace { refcount_t count; struct ns_common ns; struct user_namespace *user_ns; struct ucounts *ucounts; struct css_set *root_cset; }; extern struct cgroup_namespace init_cgroup_ns; #ifdef CONFIG_CGROUPS void free_cgroup_ns(struct cgroup_namespace *ns); struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, struct cgroup_namespace *old_ns); int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, struct cgroup_namespace *ns); #else /* !CONFIG_CGROUPS */ static inline void free_cgroup_ns(struct cgroup_namespace *ns) { } static inline struct cgroup_namespace * copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, struct cgroup_namespace *old_ns) { return old_ns; } #endif /* !CONFIG_CGROUPS */ static inline void get_cgroup_ns(struct cgroup_namespace *ns) { if (ns) refcount_inc(&ns->count); } static inline void put_cgroup_ns(struct cgroup_namespace *ns) { if (ns && refcount_dec_and_test(&ns->count)) free_cgroup_ns(ns); } #ifdef CONFIG_CGROUPS void cgroup_enter_frozen(void); void cgroup_leave_frozen(bool always_leave); void cgroup_update_frozen(struct cgroup *cgrp); void cgroup_freeze(struct cgroup *cgrp, bool freeze); void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src, struct cgroup *dst); static inline bool cgroup_task_freeze(struct task_struct *task) { bool ret; if (task->flags & PF_KTHREAD) return false; rcu_read_lock(); ret = test_bit(CGRP_FREEZE, &task_dfl_cgroup(task)->flags); rcu_read_unlock(); return ret; } static inline bool cgroup_task_frozen(struct task_struct *task) { return task->frozen; } #else /* !CONFIG_CGROUPS */ static inline void cgroup_enter_frozen(void) { } static inline void cgroup_leave_frozen(bool always_leave) { } static inline bool cgroup_task_freeze(struct task_struct *task) { return false; } static inline bool cgroup_task_frozen(struct task_struct *task) { return false; } #endif /* !CONFIG_CGROUPS */ #ifdef CONFIG_CGROUP_BPF static inline void cgroup_bpf_get(struct cgroup *cgrp) { percpu_ref_get(&cgrp->bpf.refcnt); } static inline void cgroup_bpf_put(struct cgroup *cgrp) { percpu_ref_put(&cgrp->bpf.refcnt); } #else /* CONFIG_CGROUP_BPF */ static inline void cgroup_bpf_get(struct cgroup *cgrp) {} static inline void cgroup_bpf_put(struct cgroup *cgrp) {} #endif /* CONFIG_CGROUP_BPF */ #endif /* _LINUX_CGROUP_H */
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 // SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- * sysctl_net.c: sysctl interface to net subsystem. * * Begun April 1, 1996, Mike Shaver. * Added /proc/sys/net directories for each protocol family. [MS] * * Revision 1.2 1996/05/08 20:24:40 shaver * Added bits for NET_BRIDGE and the NET_IPV4_ARP stuff and * NET_IPV4_IP_FORWARD. * * */ #include <linux/mm.h> #include <linux/export.h> #include <linux/sysctl.h> #include <linux/nsproxy.h> #include <net/sock.h> #ifdef CONFIG_INET #include <net/ip.h> #endif #ifdef CONFIG_NET #include <linux/if_ether.h> #endif static struct ctl_table_set * net_ctl_header_lookup(struct ctl_table_root *root) { return &current->nsproxy->net_ns->sysctls; } static int is_seen(struct ctl_table_set *set) { return &current->nsproxy->net_ns->sysctls == set; } /* Return standard mode bits for table entry. */ static int net_ctl_permissions(struct ctl_table_header *head, struct ctl_table *table) { struct net *net = container_of(head->set, struct net, sysctls); /* Allow network administrator to have same access as root. */ if (ns_capable_noaudit(net->user_ns, CAP_NET_ADMIN)) { int mode = (table->mode >> 6) & 7; return (mode << 6) | (mode << 3) | mode; } return table->mode; } static void net_ctl_set_ownership(struct ctl_table_header *head, struct ctl_table *table, kuid_t *uid, kgid_t *gid) { struct net *net = container_of(head->set, struct net, sysctls); kuid_t ns_root_uid; kgid_t ns_root_gid; ns_root_uid = make_kuid(net->user_ns, 0); if (uid_valid(ns_root_uid)) *uid = ns_root_uid; ns_root_gid = make_kgid(net->user_ns, 0); if (gid_valid(ns_root_gid)) *gid = ns_root_gid; } static struct ctl_table_root net_sysctl_root = { .lookup = net_ctl_header_lookup, .permissions = net_ctl_permissions, .set_ownership = net_ctl_set_ownership, }; static int __net_init sysctl_net_init(struct net *net) { setup_sysctl_set(&net->sysctls, &net_sysctl_root, is_seen); return 0; } static void __net_exit sysctl_net_exit(struct net *net) { retire_sysctl_set(&net->sysctls); } static struct pernet_operations sysctl_pernet_ops = { .init = sysctl_net_init, .exit = sysctl_net_exit, }; static struct ctl_table_header *net_header; __init int net_sysctl_init(void) { static struct ctl_table empty[1]; int ret = -ENOMEM; /* Avoid limitations in the sysctl implementation by * registering "/proc/sys/net" as an empty directory not in a * network namespace. */ net_header = register_sysctl("net", empty); if (!net_header) goto out; ret = register_pernet_subsys(&sysctl_pernet_ops); if (ret) goto out1; out: return ret; out1: unregister_sysctl_table(net_header); net_header = NULL; goto out; } struct ctl_table_header *register_net_sysctl(struct net *net, const char *path, struct ctl_table *table) { return __register_sysctl_table(&net->sysctls, path, table); } EXPORT_SYMBOL_GPL(register_net_sysctl); void unregister_net_sysctl_table(struct ctl_table_header *header) { unregister_sysctl_table(header); } EXPORT_SYMBOL_GPL(unregister_net_sysctl_table);
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_INETDEVICE_H #define _LINUX_INETDEVICE_H #ifdef __KERNEL__ #include <linux/bitmap.h> #include <linux/if.h> #include <linux/ip.h> #include <linux/netdevice.h> #include <linux/rcupdate.h> #include <linux/timer.h> #include <linux/sysctl.h> #include <linux/rtnetlink.h> #include <linux/refcount.h> struct ipv4_devconf { void *sysctl; int data[IPV4_DEVCONF_MAX]; DECLARE_BITMAP(state, IPV4_DEVCONF_MAX); }; #define MC_HASH_SZ_LOG 9 struct in_device { struct net_device *dev; refcount_t refcnt; int dead; struct in_ifaddr __rcu *ifa_list;/* IP ifaddr chain */ struct ip_mc_list __rcu *mc_list; /* IP multicast filter chain */ struct ip_mc_list __rcu * __rcu *mc_hash; int mc_count; /* Number of installed mcasts */ spinlock_t mc_tomb_lock; struct ip_mc_list *mc_tomb; unsigned long mr_v1_seen; unsigned long mr_v2_seen; unsigned long mr_maxdelay; unsigned long mr_qi; /* Query Interval */ unsigned long mr_qri; /* Query Response Interval */ unsigned char mr_qrv; /* Query Robustness Variable */ unsigned char mr_gq_running; u32 mr_ifc_count; struct timer_list mr_gq_timer; /* general query timer */ struct timer_list mr_ifc_timer; /* interface change timer */ struct neigh_parms *arp_parms; struct ipv4_devconf cnf; struct rcu_head rcu_head; }; #define IPV4_DEVCONF(cnf, attr) ((cnf).data[IPV4_DEVCONF_ ## attr - 1]) #define IPV4_DEVCONF_ALL(net, attr) \ IPV4_DEVCONF((*(net)->ipv4.devconf_all), attr) static inline int ipv4_devconf_get(struct in_device *in_dev, int index) { index--; return in_dev->cnf.data[index]; } static inline void ipv4_devconf_set(struct in_device *in_dev, int index, int val) { index--; set_bit(index, in_dev->cnf.state); in_dev->cnf.data[index] = val; } static inline void ipv4_devconf_setall(struct in_device *in_dev) { bitmap_fill(in_dev->cnf.state, IPV4_DEVCONF_MAX); } #define IN_DEV_CONF_GET(in_dev, attr) \ ipv4_devconf_get((in_dev), IPV4_DEVCONF_ ## attr) #define IN_DEV_CONF_SET(in_dev, attr, val) \ ipv4_devconf_set((in_dev), IPV4_DEVCONF_ ## attr, (val)) #define IN_DEV_ANDCONF(in_dev, attr) \ (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr) && \ IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_NET_ORCONF(in_dev, net, attr) \ (IPV4_DEVCONF_ALL(net, attr) || \ IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_ORCONF(in_dev, attr) \ IN_DEV_NET_ORCONF(in_dev, dev_net(in_dev->dev), attr) #define IN_DEV_MAXCONF(in_dev, attr) \ (max(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr), \ IN_DEV_CONF_GET((in_dev), attr))) #define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING) #define IN_DEV_MFORWARD(in_dev) IN_DEV_ANDCONF((in_dev), MC_FORWARDING) #define IN_DEV_BFORWARD(in_dev) IN_DEV_ANDCONF((in_dev), BC_FORWARDING) #define IN_DEV_RPFILTER(in_dev) IN_DEV_MAXCONF((in_dev), RP_FILTER) #define IN_DEV_SRC_VMARK(in_dev) IN_DEV_ORCONF((in_dev), SRC_VMARK) #define IN_DEV_SOURCE_ROUTE(in_dev) IN_DEV_ANDCONF((in_dev), \ ACCEPT_SOURCE_ROUTE) #define IN_DEV_ACCEPT_LOCAL(in_dev) IN_DEV_ORCONF((in_dev), ACCEPT_LOCAL) #define IN_DEV_BOOTP_RELAY(in_dev) IN_DEV_ANDCONF((in_dev), BOOTP_RELAY) #define IN_DEV_LOG_MARTIANS(in_dev) IN_DEV_ORCONF((in_dev), LOG_MARTIANS) #define IN_DEV_PROXY_ARP(in_dev) IN_DEV_ORCONF((in_dev), PROXY_ARP) #define IN_DEV_PROXY_ARP_PVLAN(in_dev) IN_DEV_CONF_GET(in_dev, PROXY_ARP_PVLAN) #define IN_DEV_SHARED_MEDIA(in_dev) IN_DEV_ORCONF((in_dev), SHARED_MEDIA) #define IN_DEV_TX_REDIRECTS(in_dev) IN_DEV_ORCONF((in_dev), SEND_REDIRECTS) #define IN_DEV_SEC_REDIRECTS(in_dev) IN_DEV_ORCONF((in_dev), \ SECURE_REDIRECTS) #define IN_DEV_IDTAG(in_dev) IN_DEV_CONF_GET(in_dev, TAG) #define IN_DEV_MEDIUM_ID(in_dev) IN_DEV_CONF_GET(in_dev, MEDIUM_ID) #define IN_DEV_PROMOTE_SECONDARIES(in_dev) \ IN_DEV_ORCONF((in_dev), \ PROMOTE_SECONDARIES) #define IN_DEV_ROUTE_LOCALNET(in_dev) IN_DEV_ORCONF(in_dev, ROUTE_LOCALNET) #define IN_DEV_NET_ROUTE_LOCALNET(in_dev, net) \ IN_DEV_NET_ORCONF(in_dev, net, ROUTE_LOCALNET) #define IN_DEV_RX_REDIRECTS(in_dev) \ ((IN_DEV_FORWARD(in_dev) && \ IN_DEV_ANDCONF((in_dev), ACCEPT_REDIRECTS)) \ || (!IN_DEV_FORWARD(in_dev) && \ IN_DEV_ORCONF((in_dev), ACCEPT_REDIRECTS))) #define IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) \ IN_DEV_CONF_GET((in_dev), IGNORE_ROUTES_WITH_LINKDOWN) #define IN_DEV_ARPFILTER(in_dev) IN_DEV_ORCONF((in_dev), ARPFILTER) #define IN_DEV_ARP_ACCEPT(in_dev) IN_DEV_ORCONF((in_dev), ARP_ACCEPT) #define IN_DEV_ARP_ANNOUNCE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE) #define IN_DEV_ARP_IGNORE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_IGNORE) #define IN_DEV_ARP_NOTIFY(in_dev) IN_DEV_MAXCONF((in_dev), ARP_NOTIFY) struct in_ifaddr { struct hlist_node hash; struct in_ifaddr __rcu *ifa_next; struct in_device *ifa_dev; struct rcu_head rcu_head; __be32 ifa_local; __be32 ifa_address; __be32 ifa_mask; __u32 ifa_rt_priority; __be32 ifa_broadcast; unsigned char ifa_scope; unsigned char ifa_prefixlen; __u32 ifa_flags; char ifa_label[IFNAMSIZ]; /* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */ __u32 ifa_valid_lft; __u32 ifa_preferred_lft; unsigned long ifa_cstamp; /* created timestamp */ unsigned long ifa_tstamp; /* updated timestamp */ }; struct in_validator_info { __be32 ivi_addr; struct in_device *ivi_dev; struct netlink_ext_ack *extack; }; int register_inetaddr_notifier(struct notifier_block *nb); int unregister_inetaddr_notifier(struct notifier_block *nb); int register_inetaddr_validator_notifier(struct notifier_block *nb); int unregister_inetaddr_validator_notifier(struct notifier_block *nb); void inet_netconf_notify_devconf(struct net *net, int event, int type, int ifindex, struct ipv4_devconf *devconf); struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref); static inline struct net_device *ip_dev_find(struct net *net, __be32 addr) { return __ip_dev_find(net, addr, true); } int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *); void devinet_init(void); struct in_device *inetdev_by_index(struct net *, int); __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope); __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, __be32 local, int scope); struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask); struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr); static inline bool inet_ifa_match(__be32 addr, const struct in_ifaddr *ifa) { return !((addr^ifa->ifa_address)&ifa->ifa_mask); } /* * Check if a mask is acceptable. */ static __inline__ bool bad_mask(__be32 mask, __be32 addr) { __u32 hmask; if (addr & (mask = ~mask)) return true; hmask = ntohl(mask); if (hmask & (hmask+1)) return true; return false; } #define in_dev_for_each_ifa_rtnl(ifa, in_dev) \ for (ifa = rtnl_dereference((in_dev)->ifa_list); ifa; \ ifa = rtnl_dereference(ifa->ifa_next)) #define in_dev_for_each_ifa_rcu(ifa, in_dev) \ for (ifa = rcu_dereference((in_dev)->ifa_list); ifa; \ ifa = rcu_dereference(ifa->ifa_next)) static inline struct in_device *__in_dev_get_rcu(const struct net_device *dev) { return rcu_dereference(dev->ip_ptr); } static inline struct in_device *in_dev_get(const struct net_device *dev) { struct in_device *in_dev; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (in_dev) refcount_inc(&in_dev->refcnt); rcu_read_unlock(); return in_dev; } static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev) { return rtnl_dereference(dev->ip_ptr); } /* called with rcu_read_lock or rtnl held */ static inline bool ip_ignore_linkdown(const struct net_device *dev) { struct in_device *in_dev; bool rc = false; in_dev = rcu_dereference_rtnl(dev->ip_ptr); if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) rc = true; return rc; } static inline struct neigh_parms *__in_dev_arp_parms_get_rcu(const struct net_device *dev) { struct in_device *in_dev = __in_dev_get_rcu(dev); return in_dev ? in_dev->arp_parms : NULL; } void in_dev_finish_destroy(struct in_device *idev); static inline void in_dev_put(struct in_device *idev) { if (refcount_dec_and_test(&idev->refcnt)) in_dev_finish_destroy(idev); } #define __in_dev_put(idev) refcount_dec(&(idev)->refcnt) #define in_dev_hold(idev) refcount_inc(&(idev)->refcnt) #endif /* __KERNEL__ */ static __inline__ __be32 inet_make_mask(int logmask) { if (logmask) return htonl(~((1U<<(32-logmask))-1)); return 0; } static __inline__ int inet_mask_len(__be32 mask) { __u32 hmask = ntohl(mask); if (!hmask) return 0; return 32 - ffz(~hmask); } #endif /* _LINUX_INETDEVICE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Red Black Trees (C) 1999 Andrea Arcangeli <andrea@suse.de> (C) 2002 David Woodhouse <dwmw2@infradead.org> (C) 2012 Michel Lespinasse <walken@google.com> linux/include/linux/rbtree_augmented.h */ #ifndef _LINUX_RBTREE_AUGMENTED_H #define _LINUX_RBTREE_AUGMENTED_H #include <linux/compiler.h> #include <linux/rbtree.h> #include <linux/rcupdate.h> /* * Please note - only struct rb_augment_callbacks and the prototypes for * rb_insert_augmented() and rb_erase_augmented() are intended to be public. * The rest are implementation details you are not expected to depend on. * * See Documentation/core-api/rbtree.rst for documentation and samples. */ struct rb_augment_callbacks { void (*propagate)(struct rb_node *node, struct rb_node *stop); void (*copy)(struct rb_node *old, struct rb_node *new); void (*rotate)(struct rb_node *old, struct rb_node *new); }; extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); /* * Fixup the rbtree and update the augmented information when rebalancing. * * On insertion, the user must update the augmented information on the path * leading to the inserted node, then call rb_link_node() as usual and * rb_insert_augmented() instead of the usual rb_insert_color() call. * If rb_insert_augmented() rebalances the rbtree, it will callback into * a user provided function to update the augmented information on the * affected subtrees. */ static inline void rb_insert_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { __rb_insert_augmented(node, root, augment->rotate); } static inline void rb_insert_augmented_cached(struct rb_node *node, struct rb_root_cached *root, bool newleft, const struct rb_augment_callbacks *augment) { if (newleft) root->rb_leftmost = node; rb_insert_augmented(node, &root->rb_root, augment); } /* * Template for declaring augmented rbtree callbacks (generic case) * * RBSTATIC: 'static' or empty * RBNAME: name of the rb_augment_callbacks structure * RBSTRUCT: struct type of the tree nodes * RBFIELD: name of struct rb_node field within RBSTRUCT * RBAUGMENTED: name of field within RBSTRUCT holding data for subtree * RBCOMPUTE: name of function that recomputes the RBAUGMENTED data */ #define RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, \ RBSTRUCT, RBFIELD, RBAUGMENTED, RBCOMPUTE) \ static inline void \ RBNAME ## _propagate(struct rb_node *rb, struct rb_node *stop) \ { \ while (rb != stop) { \ RBSTRUCT *node = rb_entry(rb, RBSTRUCT, RBFIELD); \ if (RBCOMPUTE(node, true)) \ break; \ rb = rb_parent(&node->RBFIELD); \ } \ } \ static inline void \ RBNAME ## _copy(struct rb_node *rb_old, struct rb_node *rb_new) \ { \ RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD); \ RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD); \ new->RBAUGMENTED = old->RBAUGMENTED; \ } \ static void \ RBNAME ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \ { \ RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD); \ RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD); \ new->RBAUGMENTED = old->RBAUGMENTED; \ RBCOMPUTE(old, false); \ } \ RBSTATIC const struct rb_augment_callbacks RBNAME = { \ .propagate = RBNAME ## _propagate, \ .copy = RBNAME ## _copy, \ .rotate = RBNAME ## _rotate \ }; /* * Template for declaring augmented rbtree callbacks, * computing RBAUGMENTED scalar as max(RBCOMPUTE(node)) for all subtree nodes. * * RBSTATIC: 'static' or empty * RBNAME: name of the rb_augment_callbacks structure * RBSTRUCT: struct type of the tree nodes * RBFIELD: name of struct rb_node field within RBSTRUCT * RBTYPE: type of the RBAUGMENTED field * RBAUGMENTED: name of RBTYPE field within RBSTRUCT holding data for subtree * RBCOMPUTE: name of function that returns the per-node RBTYPE scalar */ #define RB_DECLARE_CALLBACKS_MAX(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD, \ RBTYPE, RBAUGMENTED, RBCOMPUTE) \ static inline bool RBNAME ## _compute_max(RBSTRUCT *node, bool exit) \ { \ RBSTRUCT *child; \ RBTYPE max = RBCOMPUTE(node); \ if (node->RBFIELD.rb_left) { \ child = rb_entry(node->RBFIELD.rb_left, RBSTRUCT, RBFIELD); \ if (child->RBAUGMENTED > max) \ max = child->RBAUGMENTED; \ } \ if (node->RBFIELD.rb_right) { \ child = rb_entry(node->RBFIELD.rb_right, RBSTRUCT, RBFIELD); \ if (child->RBAUGMENTED > max) \ max = child->RBAUGMENTED; \ } \ if (exit && node->RBAUGMENTED == max) \ return true; \ node->RBAUGMENTED = max; \ return false; \ } \ RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, \ RBSTRUCT, RBFIELD, RBAUGMENTED, RBNAME ## _compute_max) #define RB_RED 0 #define RB_BLACK 1 #define __rb_parent(pc) ((struct rb_node *)(pc & ~3)) #define __rb_color(pc) ((pc) & 1) #define __rb_is_black(pc) __rb_color(pc) #define __rb_is_red(pc) (!__rb_color(pc)) #define rb_color(rb) __rb_color((rb)->__rb_parent_color) #define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color) #define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color) static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) { rb->__rb_parent_color = rb_color(rb) | (unsigned long)p; } static inline void rb_set_parent_color(struct rb_node *rb, struct rb_node *p, int color) { rb->__rb_parent_color = (unsigned long)p | color; } static inline void __rb_change_child(struct rb_node *old, struct rb_node *new, struct rb_node *parent, struct rb_root *root) { if (parent) { if (parent->rb_left == old) WRITE_ONCE(parent->rb_left, new); else WRITE_ONCE(parent->rb_right, new); } else WRITE_ONCE(root->rb_node, new); } static inline void __rb_change_child_rcu(struct rb_node *old, struct rb_node *new, struct rb_node *parent, struct rb_root *root) { if (parent) { if (parent->rb_left == old) rcu_assign_pointer(parent->rb_left, new); else rcu_assign_pointer(parent->rb_right, new); } else rcu_assign_pointer(root->rb_node, new); } extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); static __always_inline struct rb_node * __rb_erase_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { struct rb_node *child = node->rb_right; struct rb_node *tmp = node->rb_left; struct rb_node *parent, *rebalance; unsigned long pc; if (!tmp) { /* * Case 1: node to erase has no more than 1 child (easy!) * * Note that if there is one child it must be red due to 5) * and node must be black due to 4). We adjust colors locally * so as to bypass __rb_erase_color() later on. */ pc = node->__rb_parent_color; parent = __rb_parent(pc); __rb_change_child(node, child, parent, root); if (child) { child->__rb_parent_color = pc; rebalance = NULL; } else rebalance = __rb_is_black(pc) ? parent : NULL; tmp = parent; } else if (!child) { /* Still case 1, but this time the child is node->rb_left */ tmp->__rb_parent_color = pc = node->__rb_parent_color; parent = __rb_parent(pc); __rb_change_child(node, tmp, parent, root); rebalance = NULL; tmp = parent; } else { struct rb_node *successor = child, *child2; tmp = child->rb_left; if (!tmp) { /* * Case 2: node's successor is its right child * * (n) (s) * / \ / \ * (x) (s) -> (x) (c) * \ * (c) */ parent = successor; child2 = successor->rb_right; augment->copy(node, successor); } else { /* * Case 3: node's successor is leftmost under * node's right child subtree * * (n) (s) * / \ / \ * (x) (y) -> (x) (y) * / / * (p) (p) * / / * (s) (c) * \ * (c) */ do { parent = successor; successor = tmp; tmp = tmp->rb_left; } while (tmp); child2 = successor->rb_right; WRITE_ONCE(parent->rb_left, child2); WRITE_ONCE(successor->rb_right, child); rb_set_parent(child, successor); augment->copy(node, successor); augment->propagate(parent, successor); } tmp = node->rb_left; WRITE_ONCE(successor->rb_left, tmp); rb_set_parent(tmp, successor); pc = node->__rb_parent_color; tmp = __rb_parent(pc); __rb_change_child(node, successor, tmp, root); if (child2) { rb_set_parent_color(child2, parent, RB_BLACK); rebalance = NULL; } else { rebalance = rb_is_black(successor) ? parent : NULL; } successor->__rb_parent_color = pc; tmp = successor; } augment->propagate(tmp, NULL); return rebalance; } static __always_inline void rb_erase_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); if (rebalance) __rb_erase_color(rebalance, root, augment->rotate); } static __always_inline void rb_erase_augmented_cached(struct rb_node *node, struct rb_root_cached *root, const struct rb_augment_callbacks *augment) { if (root->rb_leftmost == node) root->rb_leftmost = rb_next(node); rb_erase_augmented(node, &root->rb_root, augment); } #endif /* _LINUX_RBTREE_AUGMENTED_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_IVERSION_H #define _LINUX_IVERSION_H #include <linux/fs.h> /* * The inode->i_version field: * --------------------------- * The change attribute (i_version) is mandated by NFSv4 and is mostly for * knfsd, but is also used for other purposes (e.g. IMA). The i_version must * appear different to observers if there was a change to the inode's data or * metadata since it was last queried. * * Observers see the i_version as a 64-bit number that never decreases. If it * remains the same since it was last checked, then nothing has changed in the * inode. If it's different then something has changed. Observers cannot infer * anything about the nature or magnitude of the changes from the value, only * that the inode has changed in some fashion. * * Not all filesystems properly implement the i_version counter. Subsystems that * want to use i_version field on an inode should first check whether the * filesystem sets the SB_I_VERSION flag (usually via the IS_I_VERSION macro). * * Those that set SB_I_VERSION will automatically have their i_version counter * incremented on writes to normal files. If the SB_I_VERSION is not set, then * the VFS will not touch it on writes, and the filesystem can use it how it * wishes. Note that the filesystem is always responsible for updating the * i_version on namespace changes in directories (mkdir, rmdir, unlink, etc.). * We consider these sorts of filesystems to have a kernel-managed i_version. * * It may be impractical for filesystems to keep i_version updates atomic with * respect to the changes that cause them. They should, however, guarantee * that i_version updates are never visible before the changes that caused * them. Also, i_version updates should never be delayed longer than it takes * the original change to reach disk. * * This implementation uses the low bit in the i_version field as a flag to * track when the value has been queried. If it has not been queried since it * was last incremented, we can skip the increment in most cases. * * In the event that we're updating the ctime, we will usually go ahead and * bump the i_version anyway. Since that has to go to stable storage in some * fashion, we might as well increment it as well. * * With this implementation, the value should always appear to observers to * increase over time if the file has changed. It's recommended to use * inode_eq_iversion() helper to compare values. * * Note that some filesystems (e.g. NFS and AFS) just use the field to store * a server-provided value (for the most part). For that reason, those * filesystems do not set SB_I_VERSION. These filesystems are considered to * have a self-managed i_version. * * Persistently storing the i_version * ---------------------------------- * Queries of the i_version field are not gated on them hitting the backing * store. It's always possible that the host could crash after allowing * a query of the value but before it has made it to disk. * * To mitigate this problem, filesystems should always use * inode_set_iversion_queried when loading an existing inode from disk. This * ensures that the next attempted inode increment will result in the value * changing. * * Storing the value to disk therefore does not count as a query, so those * filesystems should use inode_peek_iversion to grab the value to be stored. * There is no need to flag the value as having been queried in that case. */ /* * We borrow the lowest bit in the i_version to use as a flag to tell whether * it has been queried since we last incremented it. If it has, then we must * increment it on the next change. After that, we can clear the flag and * avoid incrementing it again until it has again been queried. */ #define I_VERSION_QUERIED_SHIFT (1) #define I_VERSION_QUERIED (1ULL << (I_VERSION_QUERIED_SHIFT - 1)) #define I_VERSION_INCREMENT (1ULL << I_VERSION_QUERIED_SHIFT) /** * inode_set_iversion_raw - set i_version to the specified raw value * @inode: inode to set * @val: new i_version value to set * * Set @inode's i_version field to @val. This function is for use by * filesystems that self-manage the i_version. * * For example, the NFS client stores its NFSv4 change attribute in this way, * and the AFS client stores the data_version from the server here. */ static inline void inode_set_iversion_raw(struct inode *inode, u64 val) { atomic64_set(&inode->i_version, val); } /** * inode_peek_iversion_raw - grab a "raw" iversion value * @inode: inode from which i_version should be read * * Grab a "raw" inode->i_version value and return it. The i_version is not * flagged or converted in any way. This is mostly used to access a self-managed * i_version. * * With those filesystems, we want to treat the i_version as an entirely * opaque value. */ static inline u64 inode_peek_iversion_raw(const struct inode *inode) { return atomic64_read(&inode->i_version); } /** * inode_set_max_iversion_raw - update i_version new value is larger * @inode: inode to set * @val: new i_version to set * * Some self-managed filesystems (e.g Ceph) will only update the i_version * value if the new value is larger than the one we already have. */ static inline void inode_set_max_iversion_raw(struct inode *inode, u64 val) { u64 cur, old; cur = inode_peek_iversion_raw(inode); for (;;) { if (cur > val) break; old = atomic64_cmpxchg(&inode->i_version, cur, val); if (likely(old == cur)) break; cur = old; } } /** * inode_set_iversion - set i_version to a particular value * @inode: inode to set * @val: new i_version value to set * * Set @inode's i_version field to @val. This function is for filesystems with * a kernel-managed i_version, for initializing a newly-created inode from * scratch. * * In this case, we do not set the QUERIED flag since we know that this value * has never been queried. */ static inline void inode_set_iversion(struct inode *inode, u64 val) { inode_set_iversion_raw(inode, val << I_VERSION_QUERIED_SHIFT); } /** * inode_set_iversion_queried - set i_version to a particular value as quereied * @inode: inode to set * @val: new i_version value to set * * Set @inode's i_version field to @val, and flag it for increment on the next * change. * * Filesystems that persistently store the i_version on disk should use this * when loading an existing inode from disk. * * When loading in an i_version value from a backing store, we can't be certain * that it wasn't previously viewed before being stored. Thus, we must assume * that it was, to ensure that we don't end up handing out the same value for * different versions of the same inode. */ static inline void inode_set_iversion_queried(struct inode *inode, u64 val) { inode_set_iversion_raw(inode, (val << I_VERSION_QUERIED_SHIFT) | I_VERSION_QUERIED); } /** * inode_maybe_inc_iversion - increments i_version * @inode: inode with the i_version that should be updated * @force: increment the counter even if it's not necessary? * * Every time the inode is modified, the i_version field must be seen to have * changed by any observer. * * If "force" is set or the QUERIED flag is set, then ensure that we increment * the value, and clear the queried flag. * * In the common case where neither is set, then we can return "false" without * updating i_version. * * If this function returns false, and no other metadata has changed, then we * can avoid logging the metadata. */ static inline bool inode_maybe_inc_iversion(struct inode *inode, bool force) { u64 cur, old, new; /* * The i_version field is not strictly ordered with any other inode * information, but the legacy inode_inc_iversion code used a spinlock * to serialize increments. * * Here, we add full memory barriers to ensure that any de-facto * ordering with other info is preserved. * * This barrier pairs with the barrier in inode_query_iversion() */ smp_mb(); cur = inode_peek_iversion_raw(inode); for (;;) { /* If flag is clear then we needn't do anything */ if (!force && !(cur & I_VERSION_QUERIED)) return false; /* Since lowest bit is flag, add 2 to avoid it */ new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT; old = atomic64_cmpxchg(&inode->i_version, cur, new); if (likely(old == cur)) break; cur = old; } return true; } /** * inode_inc_iversion - forcibly increment i_version * @inode: inode that needs to be updated * * Forcbily increment the i_version field. This always results in a change to * the observable value. */ static inline void inode_inc_iversion(struct inode *inode) { inode_maybe_inc_iversion(inode, true); } /** * inode_iversion_need_inc - is the i_version in need of being incremented? * @inode: inode to check * * Returns whether the inode->i_version counter needs incrementing on the next * change. Just fetch the value and check the QUERIED flag. */ static inline bool inode_iversion_need_inc(struct inode *inode) { return inode_peek_iversion_raw(inode) & I_VERSION_QUERIED; } /** * inode_inc_iversion_raw - forcibly increment raw i_version * @inode: inode that needs to be updated * * Forcbily increment the raw i_version field. This always results in a change * to the raw value. * * NFS will use the i_version field to store the value from the server. It * mostly treats it as opaque, but in the case where it holds a write * delegation, it must increment the value itself. This function does that. */ static inline void inode_inc_iversion_raw(struct inode *inode) { atomic64_inc(&inode->i_version); } /** * inode_peek_iversion - read i_version without flagging it to be incremented * @inode: inode from which i_version should be read * * Read the inode i_version counter for an inode without registering it as a * query. * * This is typically used by local filesystems that need to store an i_version * on disk. In that situation, it's not necessary to flag it as having been * viewed, as the result won't be used to gauge changes from that point. */ static inline u64 inode_peek_iversion(const struct inode *inode) { return inode_peek_iversion_raw(inode) >> I_VERSION_QUERIED_SHIFT; } /** * inode_query_iversion - read i_version for later use * @inode: inode from which i_version should be read * * Read the inode i_version counter. This should be used by callers that wish * to store the returned i_version for later comparison. This will guarantee * that a later query of the i_version will result in a different value if * anything has changed. * * In this implementation, we fetch the current value, set the QUERIED flag and * then try to swap it into place with a cmpxchg, if it wasn't already set. If * that fails, we try again with the newly fetched value from the cmpxchg. */ static inline u64 inode_query_iversion(struct inode *inode) { u64 cur, old, new; cur = inode_peek_iversion_raw(inode); for (;;) { /* If flag is already set, then no need to swap */ if (cur & I_VERSION_QUERIED) { /* * This barrier (and the implicit barrier in the * cmpxchg below) pairs with the barrier in * inode_maybe_inc_iversion(). */ smp_mb(); break; } new = cur | I_VERSION_QUERIED; old = atomic64_cmpxchg(&inode->i_version, cur, new); if (likely(old == cur)) break; cur = old; } return cur >> I_VERSION_QUERIED_SHIFT; } /** * inode_eq_iversion_raw - check whether the raw i_version counter has changed * @inode: inode to check * @old: old value to check against its i_version * * Compare the current raw i_version counter with a previous one. Returns true * if they are the same or false if they are different. */ static inline bool inode_eq_iversion_raw(const struct inode *inode, u64 old) { return inode_peek_iversion_raw(inode) == old; } /** * inode_eq_iversion - check whether the i_version counter has changed * @inode: inode to check * @old: old value to check against its i_version * * Compare an i_version counter with a previous one. Returns true if they are * the same, and false if they are different. * * Note that we don't need to set the QUERIED flag in this case, as the value * in the inode is not being recorded for later use. */ static inline bool inode_eq_iversion(const struct inode *inode, u64 old) { return inode_peek_iversion(inode) == old; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 /* SPDX-License-Identifier: GPL-2.0 */ /* * Definitions and Declarations for tuple. * * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> * - generalize L3 protocol dependent part. * * Derived from include/linux/netfiter_ipv4/ip_conntrack_tuple.h */ #ifndef _NF_CONNTRACK_TUPLE_H #define _NF_CONNTRACK_TUPLE_H #include <linux/netfilter/x_tables.h> #include <linux/netfilter/nf_conntrack_tuple_common.h> #include <linux/list_nulls.h> /* A `tuple' is a structure containing the information to uniquely identify a connection. ie. if two packets have the same tuple, they are in the same connection; if not, they are not. We divide the structure along "manipulatable" and "non-manipulatable" lines, for the benefit of the NAT code. */ #define NF_CT_TUPLE_L3SIZE ARRAY_SIZE(((union nf_inet_addr *)NULL)->all) /* The manipulable part of the tuple. */ struct nf_conntrack_man { union nf_inet_addr u3; union nf_conntrack_man_proto u; /* Layer 3 protocol */ u_int16_t l3num; }; /* This contains the information to distinguish a connection. */ struct nf_conntrack_tuple { struct nf_conntrack_man src; /* These are the parts of the tuple which are fixed. */ struct { union nf_inet_addr u3; union { /* Add other protocols here. */ __be16 all; struct { __be16 port; } tcp; struct { __be16 port; } udp; struct { u_int8_t type, code; } icmp; struct { __be16 port; } dccp; struct { __be16 port; } sctp; struct { __be16 key; } gre; } u; /* The protocol. */ u_int8_t protonum; /* The direction (for tuplehash) */ u_int8_t dir; } dst; }; struct nf_conntrack_tuple_mask { struct { union nf_inet_addr u3; union nf_conntrack_man_proto u; } src; }; static inline void nf_ct_dump_tuple_ip(const struct nf_conntrack_tuple *t) { #ifdef DEBUG printk("tuple %p: %u %pI4:%hu -> %pI4:%hu\n", t, t->dst.protonum, &t->src.u3.ip, ntohs(t->src.u.all), &t->dst.u3.ip, ntohs(t->dst.u.all)); #endif } static inline void nf_ct_dump_tuple_ipv6(const struct nf_conntrack_tuple *t) { #ifdef DEBUG printk("tuple %p: %u %pI6 %hu -> %pI6 %hu\n", t, t->dst.protonum, t->src.u3.all, ntohs(t->src.u.all), t->dst.u3.all, ntohs(t->dst.u.all)); #endif } static inline void nf_ct_dump_tuple(const struct nf_conntrack_tuple *t) { switch (t->src.l3num) { case AF_INET: nf_ct_dump_tuple_ip(t); break; case AF_INET6: nf_ct_dump_tuple_ipv6(t); break; } } /* If we're the first tuple, it's the original dir. */ #define NF_CT_DIRECTION(h) \ ((enum ip_conntrack_dir)(h)->tuple.dst.dir) /* Connections have two entries in the hash table: one for each way */ struct nf_conntrack_tuple_hash { struct hlist_nulls_node hnnode; struct nf_conntrack_tuple tuple; }; static inline bool __nf_ct_tuple_src_equal(const struct nf_conntrack_tuple *t1, const struct nf_conntrack_tuple *t2) { return (nf_inet_addr_cmp(&t1->src.u3, &t2->src.u3) && t1->src.u.all == t2->src.u.all && t1->src.l3num == t2->src.l3num); } static inline bool __nf_ct_tuple_dst_equal(const struct nf_conntrack_tuple *t1, const struct nf_conntrack_tuple *t2) { return (nf_inet_addr_cmp(&t1->dst.u3, &t2->dst.u3) && t1->dst.u.all == t2->dst.u.all && t1->dst.protonum == t2->dst.protonum); } static inline bool nf_ct_tuple_equal(const struct nf_conntrack_tuple *t1, const struct nf_conntrack_tuple *t2) { return __nf_ct_tuple_src_equal(t1, t2) && __nf_ct_tuple_dst_equal(t1, t2); } static inline bool nf_ct_tuple_mask_equal(const struct nf_conntrack_tuple_mask *m1, const struct nf_conntrack_tuple_mask *m2) { return (nf_inet_addr_cmp(&m1->src.u3, &m2->src.u3) && m1->src.u.all == m2->src.u.all); } static inline bool nf_ct_tuple_src_mask_cmp(const struct nf_conntrack_tuple *t1, const struct nf_conntrack_tuple *t2, const struct nf_conntrack_tuple_mask *mask) { int count; for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++) { if ((t1->src.u3.all[count] ^ t2->src.u3.all[count]) & mask->src.u3.all[count]) return false; } if ((t1->src.u.all ^ t2->src.u.all) & mask->src.u.all) return false; if (t1->src.l3num != t2->src.l3num || t1->dst.protonum != t2->dst.protonum) return false; return true; } static inline bool nf_ct_tuple_mask_cmp(const struct nf_conntrack_tuple *t, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple_mask *mask) { return nf_ct_tuple_src_mask_cmp(t, tuple, mask) && __nf_ct_tuple_dst_equal(t, tuple); } #endif /* _NF_CONNTRACK_TUPLE_H */
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 // SPDX-License-Identifier: GPL-2.0 /* * drivers/base/core.c - core driver model code (device registration, etc) * * Copyright (c) 2002-3 Patrick Mochel * Copyright (c) 2002-3 Open Source Development Labs * Copyright (c) 2006 Greg Kroah-Hartman <gregkh@suse.de> * Copyright (c) 2006 Novell, Inc. */ #include <linux/acpi.h> #include <linux/cpufreq.h> #include <linux/device.h> #include <linux/err.h> #include <linux/fwnode.h> #include <linux/init.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/kdev_t.h> #include <linux/notifier.h> #include <linux/of.h> #include <linux/of_device.h> #include <linux/genhd.h> #include <linux/mutex.h> #include <linux/pm_runtime.h> #include <linux/netdevice.h> #include <linux/sched/signal.h> #include <linux/sched/mm.h> #include <linux/sysfs.h> #include "base.h" #include "power/power.h" #ifdef CONFIG_SYSFS_DEPRECATED #ifdef CONFIG_SYSFS_DEPRECATED_V2 long sysfs_deprecated = 1; #else long sysfs_deprecated = 0; #endif static int __init sysfs_deprecated_setup(char *arg) { return kstrtol(arg, 10, &sysfs_deprecated); } early_param("sysfs.deprecated", sysfs_deprecated_setup); #endif /* Device links support. */ static LIST_HEAD(wait_for_suppliers); static DEFINE_MUTEX(wfs_lock); static LIST_HEAD(deferred_sync); static unsigned int defer_sync_state_count = 1; static unsigned int defer_fw_devlink_count; static LIST_HEAD(deferred_fw_devlink); static DEFINE_MUTEX(defer_fw_devlink_lock); static bool fw_devlink_is_permissive(void); #ifdef CONFIG_SRCU static DEFINE_MUTEX(device_links_lock); DEFINE_STATIC_SRCU(device_links_srcu); static inline void device_links_write_lock(void) { mutex_lock(&device_links_lock); } static inline void device_links_write_unlock(void) { mutex_unlock(&device_links_lock); } int device_links_read_lock(void) __acquires(&device_links_srcu) { return srcu_read_lock(&device_links_srcu); } void device_links_read_unlock(int idx) __releases(&device_links_srcu) { srcu_read_unlock(&device_links_srcu, idx); } int device_links_read_lock_held(void) { return srcu_read_lock_held(&device_links_srcu); } static void device_link_synchronize_removal(void) { synchronize_srcu(&device_links_srcu); } #else /* !CONFIG_SRCU */ static DECLARE_RWSEM(device_links_lock); static inline void device_links_write_lock(void) { down_write(&device_links_lock); } static inline void device_links_write_unlock(void) { up_write(&device_links_lock); } int device_links_read_lock(void) { down_read(&device_links_lock); return 0; } void device_links_read_unlock(int not_used) { up_read(&device_links_lock); } #ifdef CONFIG_DEBUG_LOCK_ALLOC int device_links_read_lock_held(void) { return lockdep_is_held(&device_links_lock); } #endif static inline void device_link_synchronize_removal(void) { } #endif /* !CONFIG_SRCU */ static bool device_is_ancestor(struct device *dev, struct device *target) { while (target->parent) { target = target->parent; if (dev == target) return true; } return false; } /** * device_is_dependent - Check if one device depends on another one * @dev: Device to check dependencies for. * @target: Device to check against. * * Check if @target depends on @dev or any device dependent on it (its child or * its consumer etc). Return 1 if that is the case or 0 otherwise. */ int device_is_dependent(struct device *dev, void *target) { struct device_link *link; int ret; /* * The "ancestors" check is needed to catch the case when the target * device has not been completely initialized yet and it is still * missing from the list of children of its parent device. */ if (dev == target || device_is_ancestor(dev, target)) return 1; ret = device_for_each_child(dev, target, device_is_dependent); if (ret) return ret; list_for_each_entry(link, &dev->links.consumers, s_node) { if (link->flags == (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED)) continue; if (link->consumer == target) return 1; ret = device_is_dependent(link->consumer, target); if (ret) break; } return ret; } static void device_link_init_status(struct device_link *link, struct device *consumer, struct device *supplier) { switch (supplier->links.status) { case DL_DEV_PROBING: switch (consumer->links.status) { case DL_DEV_PROBING: /* * A consumer driver can create a link to a supplier * that has not completed its probing yet as long as it * knows that the supplier is already functional (for * example, it has just acquired some resources from the * supplier). */ link->status = DL_STATE_CONSUMER_PROBE; break; default: link->status = DL_STATE_DORMANT; break; } break; case DL_DEV_DRIVER_BOUND: switch (consumer->links.status) { case DL_DEV_PROBING: link->status = DL_STATE_CONSUMER_PROBE; break; case DL_DEV_DRIVER_BOUND: link->status = DL_STATE_ACTIVE; break; default: link->status = DL_STATE_AVAILABLE; break; } break; case DL_DEV_UNBINDING: link->status = DL_STATE_SUPPLIER_UNBIND; break; default: link->status = DL_STATE_DORMANT; break; } } static int device_reorder_to_tail(struct device *dev, void *not_used) { struct device_link *link; /* * Devices that have not been registered yet will be put to the ends * of the lists during the registration, so skip them here. */ if (device_is_registered(dev)) devices_kset_move_last(dev); if (device_pm_initialized(dev)) device_pm_move_last(dev); device_for_each_child(dev, NULL, device_reorder_to_tail); list_for_each_entry(link, &dev->links.consumers, s_node) { if (link->flags == (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED)) continue; device_reorder_to_tail(link->consumer, NULL); } return 0; } /** * device_pm_move_to_tail - Move set of devices to the end of device lists * @dev: Device to move * * This is a device_reorder_to_tail() wrapper taking the requisite locks. * * It moves the @dev along with all of its children and all of its consumers * to the ends of the device_kset and dpm_list, recursively. */ void device_pm_move_to_tail(struct device *dev) { int idx; idx = device_links_read_lock(); device_pm_lock(); device_reorder_to_tail(dev, NULL); device_pm_unlock(); device_links_read_unlock(idx); } #define to_devlink(dev) container_of((dev), struct device_link, link_dev) static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *buf) { const char *output; switch (to_devlink(dev)->status) { case DL_STATE_NONE: output = "not tracked"; break; case DL_STATE_DORMANT: output = "dormant"; break; case DL_STATE_AVAILABLE: output = "available"; break; case DL_STATE_CONSUMER_PROBE: output = "consumer probing"; break; case DL_STATE_ACTIVE: output = "active"; break; case DL_STATE_SUPPLIER_UNBIND: output = "supplier unbinding"; break; default: output = "unknown"; break; } return sysfs_emit(buf, "%s\n", output); } static DEVICE_ATTR_RO(status); static ssize_t auto_remove_on_show(struct device *dev, struct device_attribute *attr, char *buf) { struct device_link *link = to_devlink(dev); const char *output; if (link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER) output = "supplier unbind"; else if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) output = "consumer unbind"; else output = "never"; return sysfs_emit(buf, "%s\n", output); } static DEVICE_ATTR_RO(auto_remove_on); static ssize_t runtime_pm_show(struct device *dev, struct device_attribute *attr, char *buf) { struct device_link *link = to_devlink(dev); return sysfs_emit(buf, "%d\n", !!(link->flags & DL_FLAG_PM_RUNTIME)); } static DEVICE_ATTR_RO(runtime_pm); static ssize_t sync_state_only_show(struct device *dev, struct device_attribute *attr, char *buf) { struct device_link *link = to_devlink(dev); return sysfs_emit(buf, "%d\n", !!(link->flags & DL_FLAG_SYNC_STATE_ONLY)); } static DEVICE_ATTR_RO(sync_state_only); static struct attribute *devlink_attrs[] = { &dev_attr_status.attr, &dev_attr_auto_remove_on.attr, &dev_attr_runtime_pm.attr, &dev_attr_sync_state_only.attr, NULL, }; ATTRIBUTE_GROUPS(devlink); static void device_link_release_fn(struct work_struct *work) { struct device_link *link = container_of(work, struct device_link, rm_work); /* Ensure that all references to the link object have been dropped. */ device_link_synchronize_removal(); while (refcount_dec_not_one(&link->rpm_active)) pm_runtime_put(link->supplier); put_device(link->consumer); put_device(link->supplier); kfree(link); } static void devlink_dev_release(struct device *dev) { struct device_link *link = to_devlink(dev); INIT_WORK(&link->rm_work, device_link_release_fn); /* * It may take a while to complete this work because of the SRCU * synchronization in device_link_release_fn() and if the consumer or * supplier devices get deleted when it runs, so put it into the "long" * workqueue. */ queue_work(system_long_wq, &link->rm_work); } static struct class devlink_class = { .name = "devlink", .owner = THIS_MODULE, .dev_groups = devlink_groups, .dev_release = devlink_dev_release, }; static int devlink_add_symlinks(struct device *dev, struct class_interface *class_intf) { int ret; size_t len; struct device_link *link = to_devlink(dev); struct device *sup = link->supplier; struct device *con = link->consumer; char *buf; len = max(strlen(dev_bus_name(sup)) + strlen(dev_name(sup)), strlen(dev_bus_name(con)) + strlen(dev_name(con))); len += strlen(":"); len += strlen("supplier:") + 1; buf = kzalloc(len, GFP_KERNEL); if (!buf) return -ENOMEM; ret = sysfs_create_link(&link->link_dev.kobj, &sup->kobj, "supplier"); if (ret) goto out; ret = sysfs_create_link(&link->link_dev.kobj, &con->kobj, "consumer"); if (ret) goto err_con; snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); ret = sysfs_create_link(&sup->kobj, &link->link_dev.kobj, buf); if (ret) goto err_con_dev; snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); ret = sysfs_create_link(&con->kobj, &link->link_dev.kobj, buf); if (ret) goto err_sup_dev; goto out; err_sup_dev: snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); sysfs_remove_link(&sup->kobj, buf); err_con_dev: sysfs_remove_link(&link->link_dev.kobj, "consumer"); err_con: sysfs_remove_link(&link->link_dev.kobj, "supplier"); out: kfree(buf); return ret; } static void devlink_remove_symlinks(struct device *dev, struct class_interface *class_intf) { struct device_link *link = to_devlink(dev); size_t len; struct device *sup = link->supplier; struct device *con = link->consumer; char *buf; sysfs_remove_link(&link->link_dev.kobj, "consumer"); sysfs_remove_link(&link->link_dev.kobj, "supplier"); len = max(strlen(dev_bus_name(sup)) + strlen(dev_name(sup)), strlen(dev_bus_name(con)) + strlen(dev_name(con))); len += strlen(":"); len += strlen("supplier:") + 1; buf = kzalloc(len, GFP_KERNEL); if (!buf) { WARN(1, "Unable to properly free device link symlinks!\n"); return; } if (device_is_registered(con)) { snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); sysfs_remove_link(&con->kobj, buf); } snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); sysfs_remove_link(&sup->kobj, buf); kfree(buf); } static struct class_interface devlink_class_intf = { .class = &devlink_class, .add_dev = devlink_add_symlinks, .remove_dev = devlink_remove_symlinks, }; static int __init devlink_class_init(void) { int ret; ret = class_register(&devlink_class); if (ret) return ret; ret = class_interface_register(&devlink_class_intf); if (ret) class_unregister(&devlink_class); return ret; } postcore_initcall(devlink_class_init); #define DL_MANAGED_LINK_FLAGS (DL_FLAG_AUTOREMOVE_CONSUMER | \ DL_FLAG_AUTOREMOVE_SUPPLIER | \ DL_FLAG_AUTOPROBE_CONSUMER | \ DL_FLAG_SYNC_STATE_ONLY) #define DL_ADD_VALID_FLAGS (DL_MANAGED_LINK_FLAGS | DL_FLAG_STATELESS | \ DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE) /** * device_link_add - Create a link between two devices. * @consumer: Consumer end of the link. * @supplier: Supplier end of the link. * @flags: Link flags. * * The caller is responsible for the proper synchronization of the link creation * with runtime PM. First, setting the DL_FLAG_PM_RUNTIME flag will cause the * runtime PM framework to take the link into account. Second, if the * DL_FLAG_RPM_ACTIVE flag is set in addition to it, the supplier devices will * be forced into the active metastate and reference-counted upon the creation * of the link. If DL_FLAG_PM_RUNTIME is not set, DL_FLAG_RPM_ACTIVE will be * ignored. * * If DL_FLAG_STATELESS is set in @flags, the caller of this function is * expected to release the link returned by it directly with the help of either * device_link_del() or device_link_remove(). * * If that flag is not set, however, the caller of this function is handing the * management of the link over to the driver core entirely and its return value * can only be used to check whether or not the link is present. In that case, * the DL_FLAG_AUTOREMOVE_CONSUMER and DL_FLAG_AUTOREMOVE_SUPPLIER device link * flags can be used to indicate to the driver core when the link can be safely * deleted. Namely, setting one of them in @flags indicates to the driver core * that the link is not going to be used (by the given caller of this function) * after unbinding the consumer or supplier driver, respectively, from its * device, so the link can be deleted at that point. If none of them is set, * the link will be maintained until one of the devices pointed to by it (either * the consumer or the supplier) is unregistered. * * Also, if DL_FLAG_STATELESS, DL_FLAG_AUTOREMOVE_CONSUMER and * DL_FLAG_AUTOREMOVE_SUPPLIER are not set in @flags (that is, a persistent * managed device link is being added), the DL_FLAG_AUTOPROBE_CONSUMER flag can * be used to request the driver core to automaticall probe for a consmer * driver after successfully binding a driver to the supplier device. * * The combination of DL_FLAG_STATELESS and one of DL_FLAG_AUTOREMOVE_CONSUMER, * DL_FLAG_AUTOREMOVE_SUPPLIER, or DL_FLAG_AUTOPROBE_CONSUMER set in @flags at * the same time is invalid and will cause NULL to be returned upfront. * However, if a device link between the given @consumer and @supplier pair * exists already when this function is called for them, the existing link will * be returned regardless of its current type and status (the link's flags may * be modified then). The caller of this function is then expected to treat * the link as though it has just been created, so (in particular) if * DL_FLAG_STATELESS was passed in @flags, the link needs to be released * explicitly when not needed any more (as stated above). * * A side effect of the link creation is re-ordering of dpm_list and the * devices_kset list by moving the consumer device and all devices depending * on it to the ends of these lists (that does not happen to devices that have * not been registered when this function is called). * * The supplier device is required to be registered when this function is called * and NULL will be returned if that is not the case. The consumer device need * not be registered, however. */ struct device_link *device_link_add(struct device *consumer, struct device *supplier, u32 flags) { struct device_link *link; if (!consumer || !supplier || consumer == supplier || flags & ~DL_ADD_VALID_FLAGS || (flags & DL_FLAG_STATELESS && flags & DL_MANAGED_LINK_FLAGS) || (flags & DL_FLAG_SYNC_STATE_ONLY && flags != DL_FLAG_SYNC_STATE_ONLY) || (flags & DL_FLAG_AUTOPROBE_CONSUMER && flags & (DL_FLAG_AUTOREMOVE_CONSUMER | DL_FLAG_AUTOREMOVE_SUPPLIER))) return NULL; if (flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) { if (pm_runtime_get_sync(supplier) < 0) { pm_runtime_put_noidle(supplier); return NULL; } } if (!(flags & DL_FLAG_STATELESS)) flags |= DL_FLAG_MANAGED; device_links_write_lock(); device_pm_lock(); /* * If the supplier has not been fully registered yet or there is a * reverse (non-SYNC_STATE_ONLY) dependency between the consumer and * the supplier already in the graph, return NULL. If the link is a * SYNC_STATE_ONLY link, we don't check for reverse dependencies * because it only affects sync_state() callbacks. */ if (!device_pm_initialized(supplier) || (!(flags & DL_FLAG_SYNC_STATE_ONLY) && device_is_dependent(consumer, supplier))) { link = NULL; goto out; } /* * DL_FLAG_AUTOREMOVE_SUPPLIER indicates that the link will be needed * longer than for DL_FLAG_AUTOREMOVE_CONSUMER and setting them both * together doesn't make sense, so prefer DL_FLAG_AUTOREMOVE_SUPPLIER. */ if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER; list_for_each_entry(link, &supplier->links.consumers, s_node) { if (link->consumer != consumer) continue; if (flags & DL_FLAG_PM_RUNTIME) { if (!(link->flags & DL_FLAG_PM_RUNTIME)) { pm_runtime_new_link(consumer); link->flags |= DL_FLAG_PM_RUNTIME; } if (flags & DL_FLAG_RPM_ACTIVE) refcount_inc(&link->rpm_active); } if (flags & DL_FLAG_STATELESS) { kref_get(&link->kref); if (link->flags & DL_FLAG_SYNC_STATE_ONLY && !(link->flags & DL_FLAG_STATELESS)) { link->flags |= DL_FLAG_STATELESS; goto reorder; } else { link->flags |= DL_FLAG_STATELESS; goto out; } } /* * If the life time of the link following from the new flags is * longer than indicated by the flags of the existing link, * update the existing link to stay around longer. */ if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) { if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) { link->flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER; link->flags |= DL_FLAG_AUTOREMOVE_SUPPLIER; } } else if (!(flags & DL_FLAG_AUTOREMOVE_CONSUMER)) { link->flags &= ~(DL_FLAG_AUTOREMOVE_CONSUMER | DL_FLAG_AUTOREMOVE_SUPPLIER); } if (!(link->flags & DL_FLAG_MANAGED)) { kref_get(&link->kref); link->flags |= DL_FLAG_MANAGED; device_link_init_status(link, consumer, supplier); } if (link->flags & DL_FLAG_SYNC_STATE_ONLY && !(flags & DL_FLAG_SYNC_STATE_ONLY)) { link->flags &= ~DL_FLAG_SYNC_STATE_ONLY; goto reorder; } goto out; } link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) goto out; refcount_set(&link->rpm_active, 1); get_device(supplier); link->supplier = supplier; INIT_LIST_HEAD(&link->s_node); get_device(consumer); link->consumer = consumer; INIT_LIST_HEAD(&link->c_node); link->flags = flags; kref_init(&link->kref); link->link_dev.class = &devlink_class; device_set_pm_not_required(&link->link_dev); dev_set_name(&link->link_dev, "%s:%s--%s:%s", dev_bus_name(supplier), dev_name(supplier), dev_bus_name(consumer), dev_name(consumer)); if (device_register(&link->link_dev)) { put_device(&link->link_dev); link = NULL; goto out; } if (flags & DL_FLAG_PM_RUNTIME) { if (flags & DL_FLAG_RPM_ACTIVE) refcount_inc(&link->rpm_active); pm_runtime_new_link(consumer); } /* Determine the initial link state. */ if (flags & DL_FLAG_STATELESS) link->status = DL_STATE_NONE; else device_link_init_status(link, consumer, supplier); /* * Some callers expect the link creation during consumer driver probe to * resume the supplier even without DL_FLAG_RPM_ACTIVE. */ if (link->status == DL_STATE_CONSUMER_PROBE && flags & DL_FLAG_PM_RUNTIME) pm_runtime_resume(supplier); list_add_tail_rcu(&link->s_node, &supplier->links.consumers); list_add_tail_rcu(&link->c_node, &consumer->links.suppliers); if (flags & DL_FLAG_SYNC_STATE_ONLY) { dev_dbg(consumer, "Linked as a sync state only consumer to %s\n", dev_name(supplier)); goto out; } reorder: /* * Move the consumer and all of the devices depending on it to the end * of dpm_list and the devices_kset list. * * It is necessary to hold dpm_list locked throughout all that or else * we may end up suspending with a wrong ordering of it. */ device_reorder_to_tail(consumer, NULL); dev_dbg(consumer, "Linked as a consumer to %s\n", dev_name(supplier)); out: device_pm_unlock(); device_links_write_unlock(); if ((flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) && !link) pm_runtime_put(supplier); return link; } EXPORT_SYMBOL_GPL(device_link_add); /** * device_link_wait_for_supplier - Add device to wait_for_suppliers list * @consumer: Consumer device * * Marks the @consumer device as waiting for suppliers to become available by * adding it to the wait_for_suppliers list. The consumer device will never be * probed until it's removed from the wait_for_suppliers list. * * The caller is responsible for adding the links to the supplier devices once * they are available and removing the @consumer device from the * wait_for_suppliers list once links to all the suppliers have been created. * * This function is NOT meant to be called from the probe function of the * consumer but rather from code that creates/adds the consumer device. */ static void device_link_wait_for_supplier(struct device *consumer, bool need_for_probe) { mutex_lock(&wfs_lock); list_add_tail(&consumer->links.needs_suppliers, &wait_for_suppliers); consumer->links.need_for_probe = need_for_probe; mutex_unlock(&wfs_lock); } static void device_link_wait_for_mandatory_supplier(struct device *consumer) { device_link_wait_for_supplier(consumer, true); } static void device_link_wait_for_optional_supplier(struct device *consumer) { device_link_wait_for_supplier(consumer, false); } /** * device_link_add_missing_supplier_links - Add links from consumer devices to * supplier devices, leaving any * consumer with inactive suppliers on * the wait_for_suppliers list * * Loops through all consumers waiting on suppliers and tries to add all their * supplier links. If that succeeds, the consumer device is removed from * wait_for_suppliers list. Otherwise, they are left in the wait_for_suppliers * list. Devices left on the wait_for_suppliers list will not be probed. * * The fwnode add_links callback is expected to return 0 if it has found and * added all the supplier links for the consumer device. It should return an * error if it isn't able to do so. * * The caller of device_link_wait_for_supplier() is expected to call this once * it's aware of potential suppliers becoming available. */ static void device_link_add_missing_supplier_links(void) { struct device *dev, *tmp; mutex_lock(&wfs_lock); list_for_each_entry_safe(dev, tmp, &wait_for_suppliers, links.needs_suppliers) { int ret = fwnode_call_int_op(dev->fwnode, add_links, dev); if (!ret) list_del_init(&dev->links.needs_suppliers); else if (ret != -ENODEV || fw_devlink_is_permissive()) dev->links.need_for_probe = false; } mutex_unlock(&wfs_lock); } #ifdef CONFIG_SRCU static void __device_link_del(struct kref *kref) { struct device_link *link = container_of(kref, struct device_link, kref); dev_dbg(link->consumer, "Dropping the link to %s\n", dev_name(link->supplier)); pm_runtime_drop_link(link); list_del_rcu(&link->s_node); list_del_rcu(&link->c_node); device_unregister(&link->link_dev); } #else /* !CONFIG_SRCU */ static void __device_link_del(struct kref *kref) { struct device_link *link = container_of(kref, struct device_link, kref); dev_info(link->consumer, "Dropping the link to %s\n", dev_name(link->supplier)); pm_runtime_drop_link(link); list_del(&link->s_node); list_del(&link->c_node); device_unregister(&link->link_dev); } #endif /* !CONFIG_SRCU */ static void device_link_put_kref(struct device_link *link) { if (link->flags & DL_FLAG_STATELESS) kref_put(&link->kref, __device_link_del); else WARN(1, "Unable to drop a managed device link reference\n"); } /** * device_link_del - Delete a stateless link between two devices. * @link: Device link to delete. * * The caller must ensure proper synchronization of this function with runtime * PM. If the link was added multiple times, it needs to be deleted as often. * Care is required for hotplugged devices: Their links are purged on removal * and calling device_link_del() is then no longer allowed. */ void device_link_del(struct device_link *link) { device_links_write_lock(); device_link_put_kref(link); device_links_write_unlock(); } EXPORT_SYMBOL_GPL(device_link_del); /** * device_link_remove - Delete a stateless link between two devices. * @consumer: Consumer end of the link. * @supplier: Supplier end of the link. * * The caller must ensure proper synchronization of this function with runtime * PM. */ void device_link_remove(void *consumer, struct device *supplier) { struct device_link *link; if (WARN_ON(consumer == supplier)) return; device_links_write_lock(); list_for_each_entry(link, &supplier->links.consumers, s_node) { if (link->consumer == consumer) { device_link_put_kref(link); break; } } device_links_write_unlock(); } EXPORT_SYMBOL_GPL(device_link_remove); static void device_links_missing_supplier(struct device *dev) { struct device_link *link; list_for_each_entry(link, &dev->links.suppliers, c_node) { if (link->status != DL_STATE_CONSUMER_PROBE) continue; if (link->supplier->links.status == DL_DEV_DRIVER_BOUND) { WRITE_ONCE(link->status, DL_STATE_AVAILABLE); } else { WARN_ON(!(link->flags & DL_FLAG_SYNC_STATE_ONLY)); WRITE_ONCE(link->status, DL_STATE_DORMANT); } } } /** * device_links_check_suppliers - Check presence of supplier drivers. * @dev: Consumer device. * * Check links from this device to any suppliers. Walk the list of the device's * links to suppliers and see if all of them are available. If not, simply * return -EPROBE_DEFER. * * We need to guarantee that the supplier will not go away after the check has * been positive here. It only can go away in __device_release_driver() and * that function checks the device's links to consumers. This means we need to * mark the link as "consumer probe in progress" to make the supplier removal * wait for us to complete (or bad things may happen). * * Links without the DL_FLAG_MANAGED flag set are ignored. */ int device_links_check_suppliers(struct device *dev) { struct device_link *link; int ret = 0; /* * Device waiting for supplier to become available is not allowed to * probe. */ mutex_lock(&wfs_lock); if (!list_empty(&dev->links.needs_suppliers) && dev->links.need_for_probe) { mutex_unlock(&wfs_lock); return -EPROBE_DEFER; } mutex_unlock(&wfs_lock); device_links_write_lock(); list_for_each_entry(link, &dev->links.suppliers, c_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; if (link->status != DL_STATE_AVAILABLE && !(link->flags & DL_FLAG_SYNC_STATE_ONLY)) { device_links_missing_supplier(dev); ret = -EPROBE_DEFER; break; } WRITE_ONCE(link->status, DL_STATE_CONSUMER_PROBE); } dev->links.status = DL_DEV_PROBING; device_links_write_unlock(); return ret; } /** * __device_links_queue_sync_state - Queue a device for sync_state() callback * @dev: Device to call sync_state() on * @list: List head to queue the @dev on * * Queues a device for a sync_state() callback when the device links write lock * isn't held. This allows the sync_state() execution flow to use device links * APIs. The caller must ensure this function is called with * device_links_write_lock() held. * * This function does a get_device() to make sure the device is not freed while * on this list. * * So the caller must also ensure that device_links_flush_sync_list() is called * as soon as the caller releases device_links_write_lock(). This is necessary * to make sure the sync_state() is called in a timely fashion and the * put_device() is called on this device. */ static void __device_links_queue_sync_state(struct device *dev, struct list_head *list) { struct device_link *link; if (!dev_has_sync_state(dev)) return; if (dev->state_synced) return; list_for_each_entry(link, &dev->links.consumers, s_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; if (link->status != DL_STATE_ACTIVE) return; } /* * Set the flag here to avoid adding the same device to a list more * than once. This can happen if new consumers get added to the device * and probed before the list is flushed. */ dev->state_synced = true; if (WARN_ON(!list_empty(&dev->links.defer_hook))) return; get_device(dev); list_add_tail(&dev->links.defer_hook, list); } /** * device_links_flush_sync_list - Call sync_state() on a list of devices * @list: List of devices to call sync_state() on * @dont_lock_dev: Device for which lock is already held by the caller * * Calls sync_state() on all the devices that have been queued for it. This * function is used in conjunction with __device_links_queue_sync_state(). The * @dont_lock_dev parameter is useful when this function is called from a * context where a device lock is already held. */ static void device_links_flush_sync_list(struct list_head *list, struct device *dont_lock_dev) { struct device *dev, *tmp; list_for_each_entry_safe(dev, tmp, list, links.defer_hook) { list_del_init(&dev->links.defer_hook); if (dev != dont_lock_dev) device_lock(dev); if (dev->bus->sync_state) dev->bus->sync_state(dev); else if (dev->driver && dev->driver->sync_state) dev->driver->sync_state(dev); if (dev != dont_lock_dev) device_unlock(dev); put_device(dev); } } void device_links_supplier_sync_state_pause(void) { device_links_write_lock(); defer_sync_state_count++; device_links_write_unlock(); } void device_links_supplier_sync_state_resume(void) { struct device *dev, *tmp; LIST_HEAD(sync_list); device_links_write_lock(); if (!defer_sync_state_count) { WARN(true, "Unmatched sync_state pause/resume!"); goto out; } defer_sync_state_count--; if (defer_sync_state_count) goto out; list_for_each_entry_safe(dev, tmp, &deferred_sync, links.defer_hook) { /* * Delete from deferred_sync list before queuing it to * sync_list because defer_hook is used for both lists. */ list_del_init(&dev->links.defer_hook); __device_links_queue_sync_state(dev, &sync_list); } out: device_links_write_unlock(); device_links_flush_sync_list(&sync_list, NULL); } static int sync_state_resume_initcall(void) { device_links_supplier_sync_state_resume(); return 0; } late_initcall(sync_state_resume_initcall); static void __device_links_supplier_defer_sync(struct device *sup) { if (list_empty(&sup->links.defer_hook) && dev_has_sync_state(sup)) list_add_tail(&sup->links.defer_hook, &deferred_sync); } static void device_link_drop_managed(struct device_link *link) { link->flags &= ~DL_FLAG_MANAGED; WRITE_ONCE(link->status, DL_STATE_NONE); kref_put(&link->kref, __device_link_del); } static ssize_t waiting_for_supplier_show(struct device *dev, struct device_attribute *attr, char *buf) { bool val; device_lock(dev); mutex_lock(&wfs_lock); val = !list_empty(&dev->links.needs_suppliers) && dev->links.need_for_probe; mutex_unlock(&wfs_lock); device_unlock(dev); return sysfs_emit(buf, "%u\n", val); } static DEVICE_ATTR_RO(waiting_for_supplier); /** * device_links_driver_bound - Update device links after probing its driver. * @dev: Device to update the links for. * * The probe has been successful, so update links from this device to any * consumers by changing their status to "available". * * Also change the status of @dev's links to suppliers to "active". * * Links without the DL_FLAG_MANAGED flag set are ignored. */ void device_links_driver_bound(struct device *dev) { struct device_link *link, *ln; LIST_HEAD(sync_list); /* * If a device probes successfully, it's expected to have created all * the device links it needs to or make new device links as it needs * them. So, it no longer needs to wait on any suppliers. */ mutex_lock(&wfs_lock); list_del_init(&dev->links.needs_suppliers); mutex_unlock(&wfs_lock); device_remove_file(dev, &dev_attr_waiting_for_supplier); device_links_write_lock(); list_for_each_entry(link, &dev->links.consumers, s_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; /* * Links created during consumer probe may be in the "consumer * probe" state to start with if the supplier is still probing * when they are created and they may become "active" if the * consumer probe returns first. Skip them here. */ if (link->status == DL_STATE_CONSUMER_PROBE || link->status == DL_STATE_ACTIVE) continue; WARN_ON(link->status != DL_STATE_DORMANT); WRITE_ONCE(link->status, DL_STATE_AVAILABLE); if (link->flags & DL_FLAG_AUTOPROBE_CONSUMER) driver_deferred_probe_add(link->consumer); } if (defer_sync_state_count) __device_links_supplier_defer_sync(dev); else __device_links_queue_sync_state(dev, &sync_list); list_for_each_entry_safe(link, ln, &dev->links.suppliers, c_node) { struct device *supplier; if (!(link->flags & DL_FLAG_MANAGED)) continue; supplier = link->supplier; if (link->flags & DL_FLAG_SYNC_STATE_ONLY) { /* * When DL_FLAG_SYNC_STATE_ONLY is set, it means no * other DL_MANAGED_LINK_FLAGS have been set. So, it's * save to drop the managed link completely. */ device_link_drop_managed(link); } else { WARN_ON(link->status != DL_STATE_CONSUMER_PROBE); WRITE_ONCE(link->status, DL_STATE_ACTIVE); } /* * This needs to be done even for the deleted * DL_FLAG_SYNC_STATE_ONLY device link in case it was the last * device link that was preventing the supplier from getting a * sync_state() call. */ if (defer_sync_state_count) __device_links_supplier_defer_sync(supplier); else __device_links_queue_sync_state(supplier, &sync_list); } dev->links.status = DL_DEV_DRIVER_BOUND; device_links_write_unlock(); device_links_flush_sync_list(&sync_list, dev); } /** * __device_links_no_driver - Update links of a device without a driver. * @dev: Device without a drvier. * * Delete all non-persistent links from this device to any suppliers. * * Persistent links stay around, but their status is changed to "available", * unless they already are in the "supplier unbind in progress" state in which * case they need not be updated. * * Links without the DL_FLAG_MANAGED flag set are ignored. */ static void __device_links_no_driver(struct device *dev) { struct device_link *link, *ln; list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) { device_link_drop_managed(link); continue; } if (link->status != DL_STATE_CONSUMER_PROBE && link->status != DL_STATE_ACTIVE) continue; if (link->supplier->links.status == DL_DEV_DRIVER_BOUND) { WRITE_ONCE(link->status, DL_STATE_AVAILABLE); } else { WARN_ON(!(link->flags & DL_FLAG_SYNC_STATE_ONLY)); WRITE_ONCE(link->status, DL_STATE_DORMANT); } } dev->links.status = DL_DEV_NO_DRIVER; } /** * device_links_no_driver - Update links after failing driver probe. * @dev: Device whose driver has just failed to probe. * * Clean up leftover links to consumers for @dev and invoke * %__device_links_no_driver() to update links to suppliers for it as * appropriate. * * Links without the DL_FLAG_MANAGED flag set are ignored. */ void device_links_no_driver(struct device *dev) { struct device_link *link; device_links_write_lock(); list_for_each_entry(link, &dev->links.consumers, s_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; /* * The probe has failed, so if the status of the link is * "consumer probe" or "active", it must have been added by * a probing consumer while this device was still probing. * Change its state to "dormant", as it represents a valid * relationship, but it is not functionally meaningful. */ if (link->status == DL_STATE_CONSUMER_PROBE || link->status == DL_STATE_ACTIVE) WRITE_ONCE(link->status, DL_STATE_DORMANT); } __device_links_no_driver(dev); device_links_write_unlock(); } /** * device_links_driver_cleanup - Update links after driver removal. * @dev: Device whose driver has just gone away. * * Update links to consumers for @dev by changing their status to "dormant" and * invoke %__device_links_no_driver() to update links to suppliers for it as * appropriate. * * Links without the DL_FLAG_MANAGED flag set are ignored. */ void device_links_driver_cleanup(struct device *dev) { struct device_link *link, *ln; device_links_write_lock(); list_for_each_entry_safe(link, ln, &dev->links.consumers, s_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; WARN_ON(link->flags & DL_FLAG_AUTOREMOVE_CONSUMER); WARN_ON(link->status != DL_STATE_SUPPLIER_UNBIND); /* * autoremove the links between this @dev and its consumer * devices that are not active, i.e. where the link state * has moved to DL_STATE_SUPPLIER_UNBIND. */ if (link->status == DL_STATE_SUPPLIER_UNBIND && link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER) device_link_drop_managed(link); WRITE_ONCE(link->status, DL_STATE_DORMANT); } list_del_init(&dev->links.defer_hook); __device_links_no_driver(dev); device_links_write_unlock(); } /** * device_links_busy - Check if there are any busy links to consumers. * @dev: Device to check. * * Check each consumer of the device and return 'true' if its link's status * is one of "consumer probe" or "active" (meaning that the given consumer is * probing right now or its driver is present). Otherwise, change the link * state to "supplier unbind" to prevent the consumer from being probed * successfully going forward. * * Return 'false' if there are no probing or active consumers. * * Links without the DL_FLAG_MANAGED flag set are ignored. */ bool device_links_busy(struct device *dev) { struct device_link *link; bool ret = false; device_links_write_lock(); list_for_each_entry(link, &dev->links.consumers, s_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; if (link->status == DL_STATE_CONSUMER_PROBE || link->status == DL_STATE_ACTIVE) { ret = true; break; } WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND); } dev->links.status = DL_DEV_UNBINDING; device_links_write_unlock(); return ret; } /** * device_links_unbind_consumers - Force unbind consumers of the given device. * @dev: Device to unbind the consumers of. * * Walk the list of links to consumers for @dev and if any of them is in the * "consumer probe" state, wait for all device probes in progress to complete * and start over. * * If that's not the case, change the status of the link to "supplier unbind" * and check if the link was in the "active" state. If so, force the consumer * driver to unbind and start over (the consumer will not re-probe as we have * changed the state of the link already). * * Links without the DL_FLAG_MANAGED flag set are ignored. */ void device_links_unbind_consumers(struct device *dev) { struct device_link *link; start: device_links_write_lock(); list_for_each_entry(link, &dev->links.consumers, s_node) { enum device_link_state status; if (!(link->flags & DL_FLAG_MANAGED) || link->flags & DL_FLAG_SYNC_STATE_ONLY) continue; status = link->status; if (status == DL_STATE_CONSUMER_PROBE) { device_links_write_unlock(); wait_for_device_probe(); goto start; } WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND); if (status == DL_STATE_ACTIVE) { struct device *consumer = link->consumer; get_device(consumer); device_links_write_unlock(); device_release_driver_internal(consumer, NULL, consumer->parent); put_device(consumer); goto start; } } device_links_write_unlock(); } /** * device_links_purge - Delete existing links to other devices. * @dev: Target device. */ static void device_links_purge(struct device *dev) { struct device_link *link, *ln; if (dev->class == &devlink_class) return; mutex_lock(&wfs_lock); list_del_init(&dev->links.needs_suppliers); mutex_unlock(&wfs_lock); /* * Delete all of the remaining links from this device to any other * devices (either consumers or suppliers). */ device_links_write_lock(); list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) { WARN_ON(link->status == DL_STATE_ACTIVE); __device_link_del(&link->kref); } list_for_each_entry_safe_reverse(link, ln, &dev->links.consumers, s_node) { WARN_ON(link->status != DL_STATE_DORMANT && link->status != DL_STATE_NONE); __device_link_del(&link->kref); } device_links_write_unlock(); } static u32 fw_devlink_flags = DL_FLAG_SYNC_STATE_ONLY; static int __init fw_devlink_setup(char *arg) { if (!arg) return -EINVAL; if (strcmp(arg, "off") == 0) { fw_devlink_flags = 0; } else if (strcmp(arg, "permissive") == 0) { fw_devlink_flags = DL_FLAG_SYNC_STATE_ONLY; } else if (strcmp(arg, "on") == 0) { fw_devlink_flags = DL_FLAG_AUTOPROBE_CONSUMER; } else if (strcmp(arg, "rpm") == 0) { fw_devlink_flags = DL_FLAG_AUTOPROBE_CONSUMER | DL_FLAG_PM_RUNTIME; } return 0; } early_param("fw_devlink", fw_devlink_setup); u32 fw_devlink_get_flags(void) { return fw_devlink_flags; } static bool fw_devlink_is_permissive(void) { return fw_devlink_flags == DL_FLAG_SYNC_STATE_ONLY; } static void fw_devlink_link_device(struct device *dev) { int fw_ret; if (!fw_devlink_flags) return; mutex_lock(&defer_fw_devlink_lock); if (!defer_fw_devlink_count) device_link_add_missing_supplier_links(); /* * The device's fwnode not having add_links() doesn't affect if other * consumers can find this device as a supplier. So, this check is * intentionally placed after device_link_add_missing_supplier_links(). */ if (!fwnode_has_op(dev->fwnode, add_links)) goto out; /* * If fw_devlink is being deferred, assume all devices have mandatory * suppliers they need to link to later. Then, when the fw_devlink is * resumed, all these devices will get a chance to try and link to any * suppliers they have. */ if (!defer_fw_devlink_count) { fw_ret = fwnode_call_int_op(dev->fwnode, add_links, dev); if (fw_ret == -ENODEV && fw_devlink_is_permissive()) fw_ret = -EAGAIN; } else { fw_ret = -ENODEV; /* * defer_hook is not used to add device to deferred_sync list * until device is bound. Since deferred fw devlink also blocks * probing, same list hook can be used for deferred_fw_devlink. */ list_add_tail(&dev->links.defer_hook, &deferred_fw_devlink); } if (fw_ret == -ENODEV) device_link_wait_for_mandatory_supplier(dev); else if (fw_ret) device_link_wait_for_optional_supplier(dev); out: mutex_unlock(&defer_fw_devlink_lock); } /** * fw_devlink_pause - Pause parsing of fwnode to create device links * * Calling this function defers any fwnode parsing to create device links until * fw_devlink_resume() is called. Both these functions are ref counted and the * caller needs to match the calls. * * While fw_devlink is paused: * - Any device that is added won't have its fwnode parsed to create device * links. * - The probe of the device will also be deferred during this period. * - Any devices that were already added, but waiting for suppliers won't be * able to link to newly added devices. * * Once fw_devlink_resume(): * - All the fwnodes that was not parsed will be parsed. * - All the devices that were deferred probing will be reattempted if they * aren't waiting for any more suppliers. * * This pair of functions, is mainly meant to optimize the parsing of fwnodes * when a lot of devices that need to link to each other are added in a short * interval of time. For example, adding all the top level devices in a system. * * For example, if N devices are added and: * - All the consumers are added before their suppliers * - All the suppliers of the N devices are part of the N devices * * Then: * * - With the use of fw_devlink_pause() and fw_devlink_resume(), each device * will only need one parsing of its fwnode because it is guaranteed to find * all the supplier devices already registered and ready to link to. It won't * have to do another pass later to find one or more suppliers it couldn't * find in the first parse of the fwnode. So, we'll only need O(N) fwnode * parses. * * - Without the use of fw_devlink_pause() and fw_devlink_resume(), we would * end up doing O(N^2) parses of fwnodes because every device that's added is * guaranteed to trigger a parse of the fwnode of every device added before * it. This O(N^2) parse is made worse by the fact that when a fwnode of a * device is parsed, all it descendant devices might need to have their * fwnodes parsed too (even if the devices themselves aren't added). */ void fw_devlink_pause(void) { mutex_lock(&defer_fw_devlink_lock); defer_fw_devlink_count++; mutex_unlock(&defer_fw_devlink_lock); } /** fw_devlink_resume - Resume parsing of fwnode to create device links * * This function is used in conjunction with fw_devlink_pause() and is ref * counted. See documentation for fw_devlink_pause() for more details. */ void fw_devlink_resume(void) { struct device *dev, *tmp; LIST_HEAD(probe_list); mutex_lock(&defer_fw_devlink_lock); if (!defer_fw_devlink_count) { WARN(true, "Unmatched fw_devlink pause/resume!"); goto out; } defer_fw_devlink_count--; if (defer_fw_devlink_count) goto out; device_link_add_missing_supplier_links(); list_splice_tail_init(&deferred_fw_devlink, &probe_list); out: mutex_unlock(&defer_fw_devlink_lock); /* * bus_probe_device() can cause new devices to get added and they'll * try to grab defer_fw_devlink_lock. So, this needs to be done outside * the defer_fw_devlink_lock. */ list_for_each_entry_safe(dev, tmp, &probe_list, links.defer_hook) { list_del_init(&dev->links.defer_hook); bus_probe_device(dev); } } /* Device links support end. */ int (*platform_notify)(struct device *dev) = NULL; int (*platform_notify_remove)(struct device *dev) = NULL; static struct kobject *dev_kobj; struct kobject *sysfs_dev_char_kobj; struct kobject *sysfs_dev_block_kobj; static DEFINE_MUTEX(device_hotplug_lock); void lock_device_hotplug(void) { mutex_lock(&device_hotplug_lock); } void unlock_device_hotplug(void) { mutex_unlock(&device_hotplug_lock); } int lock_device_hotplug_sysfs(void) { if (mutex_trylock(&device_hotplug_lock)) return 0; /* Avoid busy looping (5 ms of sleep should do). */ msleep(5); return restart_syscall(); } #ifdef CONFIG_BLOCK static inline int device_is_not_partition(struct device *dev) { return !(dev->type == &part_type); } #else static inline int device_is_not_partition(struct device *dev) { return 1; } #endif static int device_platform_notify(struct device *dev, enum kobject_action action) { int ret; ret = acpi_platform_notify(dev, action); if (ret) return ret; ret = software_node_notify(dev, action); if (ret) return ret; if (platform_notify && action == KOBJ_ADD) platform_notify(dev); else if (platform_notify_remove && action == KOBJ_REMOVE) platform_notify_remove(dev); return 0; } /** * dev_driver_string - Return a device's driver name, if at all possible * @dev: struct device to get the name of * * Will return the device's driver's name if it is bound to a device. If * the device is not bound to a driver, it will return the name of the bus * it is attached to. If it is not attached to a bus either, an empty * string will be returned. */ const char *dev_driver_string(const struct device *dev) { struct device_driver *drv; /* dev->driver can change to NULL underneath us because of unbinding, * so be careful about accessing it. dev->bus and dev->class should * never change once they are set, so they don't need special care. */ drv = READ_ONCE(dev->driver); return drv ? drv->name : dev_bus_name(dev); } EXPORT_SYMBOL(dev_driver_string); #define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr) static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct device_attribute *dev_attr = to_dev_attr(attr); struct device *dev = kobj_to_dev(kobj); ssize_t ret = -EIO; if (dev_attr->show) ret = dev_attr->show(dev, dev_attr, buf); if (ret >= (ssize_t)PAGE_SIZE) { printk("dev_attr_show: %pS returned bad count\n", dev_attr->show); } return ret; } static ssize_t dev_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct device_attribute *dev_attr = to_dev_attr(attr); struct device *dev = kobj_to_dev(kobj); ssize_t ret = -EIO; if (dev_attr->store) ret = dev_attr->store(dev, dev_attr, buf, count); return ret; } static const struct sysfs_ops dev_sysfs_ops = { .show = dev_attr_show, .store = dev_attr_store, }; #define to_ext_attr(x) container_of(x, struct dev_ext_attribute, attr) ssize_t device_store_ulong(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { struct dev_ext_attribute *ea = to_ext_attr(attr); int ret; unsigned long new; ret = kstrtoul(buf, 0, &new); if (ret) return ret; *(unsigned long *)(ea->var) = new; /* Always return full write size even if we didn't consume all */ return size; } EXPORT_SYMBOL_GPL(device_store_ulong); ssize_t device_show_ulong(struct device *dev, struct device_attribute *attr, char *buf) { struct dev_ext_attribute *ea = to_ext_attr(attr); return sysfs_emit(buf, "%lx\n", *(unsigned long *)(ea->var)); } EXPORT_SYMBOL_GPL(device_show_ulong); ssize_t device_store_int(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { struct dev_ext_attribute *ea = to_ext_attr(attr); int ret; long new; ret = kstrtol(buf, 0, &new); if (ret) return ret; if (new > INT_MAX || new < INT_MIN) return -EINVAL; *(int *)(ea->var) = new; /* Always return full write size even if we didn't consume all */ return size; } EXPORT_SYMBOL_GPL(device_store_int); ssize_t device_show_int(struct device *dev, struct device_attribute *attr, char *buf) { struct dev_ext_attribute *ea = to_ext_attr(attr); return sysfs_emit(buf, "%d\n", *(int *)(ea->var)); } EXPORT_SYMBOL_GPL(device_show_int); ssize_t device_store_bool(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { struct dev_ext_attribute *ea = to_ext_attr(attr); if (strtobool(buf, ea->var) < 0) return -EINVAL; return size; } EXPORT_SYMBOL_GPL(device_store_bool); ssize_t device_show_bool(struct device *dev, struct device_attribute *attr, char *buf) { struct dev_ext_attribute *ea = to_ext_attr(attr); return sysfs_emit(buf, "%d\n", *(bool *)(ea->var)); } EXPORT_SYMBOL_GPL(device_show_bool); /** * device_release - free device structure. * @kobj: device's kobject. * * This is called once the reference count for the object * reaches 0. We forward the call to the device's release * method, which should handle actually freeing the structure. */ static void device_release(struct kobject *kobj) { struct device *dev = kobj_to_dev(kobj); struct device_private *p = dev->p; /* * Some platform devices are driven without driver attached * and managed resources may have been acquired. Make sure * all resources are released. * * Drivers still can add resources into device after device * is deleted but alive, so release devres here to avoid * possible memory leak. */ devres_release_all(dev); kfree(dev->dma_range_map); if (dev->release) dev->release(dev); else if (dev->type && dev->type->release) dev->type->release(dev); else if (dev->class && dev->class->dev_release) dev->class->dev_release(dev); else WARN(1, KERN_ERR "Device '%s' does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n", dev_name(dev)); kfree(p); } static const void *device_namespace(struct kobject *kobj) { struct device *dev = kobj_to_dev(kobj); const void *ns = NULL; if (dev->class && dev->class->ns_type) ns = dev->class->namespace(dev); return ns; } static void device_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) { struct device *dev = kobj_to_dev(kobj); if (dev->class && dev->class->get_ownership) dev->class->get_ownership(dev, uid, gid); } static struct kobj_type device_ktype = { .release = device_release, .sysfs_ops = &dev_sysfs_ops, .namespace = device_namespace, .get_ownership = device_get_ownership, }; static int dev_uevent_filter(struct kset *kset, struct kobject *kobj) { struct kobj_type *ktype = get_ktype(kobj); if (ktype == &device_ktype) { struct device *dev = kobj_to_dev(kobj); if (dev->bus) return 1; if (dev->class) return 1; } return 0; } static const char *dev_uevent_name(struct kset *kset, struct kobject *kobj) { struct device *dev = kobj_to_dev(kobj); if (dev->bus) return dev->bus->name; if (dev->class) return dev->class->name; return NULL; } static int dev_uevent(struct kset *kset, struct kobject *kobj, struct kobj_uevent_env *env) { struct device *dev = kobj_to_dev(kobj); int retval = 0; /* add device node properties if present */ if (MAJOR(dev->devt)) { const char *tmp; const char *name; umode_t mode = 0; kuid_t uid = GLOBAL_ROOT_UID; kgid_t gid = GLOBAL_ROOT_GID; add_uevent_var(env, "MAJOR=%u", MAJOR(dev->devt)); add_uevent_var(env, "MINOR=%u", MINOR(dev->devt)); name = device_get_devnode(dev, &mode, &uid, &gid, &tmp); if (name) { add_uevent_var(env, "DEVNAME=%s", name); if (mode) add_uevent_var(env, "DEVMODE=%#o", mode & 0777); if (!uid_eq(uid, GLOBAL_ROOT_UID)) add_uevent_var(env, "DEVUID=%u", from_kuid(&init_user_ns, uid)); if (!gid_eq(gid, GLOBAL_ROOT_GID)) add_uevent_var(env, "DEVGID=%u", from_kgid(&init_user_ns, gid)); kfree(tmp); } } if (dev->type && dev->type->name) add_uevent_var(env, "DEVTYPE=%s", dev->type->name); if (dev->driver) add_uevent_var(env, "DRIVER=%s", dev->driver->name); /* Add common DT information about the device */ of_device_uevent(dev, env); /* have the bus specific function add its stuff */ if (dev->bus && dev->bus->uevent) { retval = dev->bus->uevent(dev, env); if (retval) pr_debug("device: '%s': %s: bus uevent() returned %d\n", dev_name(dev), __func__, retval); } /* have the class specific function add its stuff */ if (dev->class && dev->class->dev_uevent) { retval = dev->class->dev_uevent(dev, env); if (retval) pr_debug("device: '%s': %s: class uevent() " "returned %d\n", dev_name(dev), __func__, retval); } /* have the device type specific function add its stuff */ if (dev->type && dev->type->uevent) { retval = dev->type->uevent(dev, env); if (retval) pr_debug("device: '%s': %s: dev_type uevent() " "returned %d\n", dev_name(dev), __func__, retval); } return retval; } static const struct kset_uevent_ops device_uevent_ops = { .filter = dev_uevent_filter, .name = dev_uevent_name, .uevent = dev_uevent, }; static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, char *buf) { struct kobject *top_kobj; struct kset *kset; struct kobj_uevent_env *env = NULL; int i; int len = 0; int retval; /* search the kset, the device belongs to */ top_kobj = &dev->kobj; while (!top_kobj->kset && top_kobj->parent) top_kobj = top_kobj->parent; if (!top_kobj->kset) goto out; kset = top_kobj->kset; if (!kset->uevent_ops || !kset->uevent_ops->uevent) goto out; /* respect filter */ if (kset->uevent_ops && kset->uevent_ops->filter) if (!kset->uevent_ops->filter(kset, &dev->kobj)) goto out; env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL); if (!env) return -ENOMEM; /* let the kset specific function add its keys */ retval = kset->uevent_ops->uevent(kset, &dev->kobj, env); if (retval) goto out; /* copy keys to file */ for (i = 0; i < env->envp_idx; i++) len += sysfs_emit_at(buf, len, "%s\n", env->envp[i]); out: kfree(env); return len; } static ssize_t uevent_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int rc; rc = kobject_synth_uevent(&dev->kobj, buf, count); if (rc) { dev_err(dev, "uevent: failed to send synthetic uevent\n"); return rc; } return count; } static DEVICE_ATTR_RW(uevent); static ssize_t online_show(struct device *dev, struct device_attribute *attr, char *buf) { bool val; device_lock(dev); val = !dev->offline; device_unlock(dev); return sysfs_emit(buf, "%u\n", val); } static ssize_t online_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { bool val; int ret; ret = strtobool(buf, &val); if (ret < 0) return ret; ret = lock_device_hotplug_sysfs(); if (ret) return ret; ret = val ? device_online(dev) : device_offline(dev); unlock_device_hotplug(); return ret < 0 ? ret : count; } static DEVICE_ATTR_RW(online); int device_add_groups(struct device *dev, const struct attribute_group **groups) { return sysfs_create_groups(&dev->kobj, groups); } EXPORT_SYMBOL_GPL(device_add_groups); void device_remove_groups(struct device *dev, const struct attribute_group **groups) { sysfs_remove_groups(&dev->kobj, groups); } EXPORT_SYMBOL_GPL(device_remove_groups); union device_attr_group_devres { const struct attribute_group *group; const struct attribute_group **groups; }; static int devm_attr_group_match(struct device *dev, void *res, void *data) { return ((union device_attr_group_devres *)res)->group == data; } static void devm_attr_group_remove(struct device *dev, void *res) { union device_attr_group_devres *devres = res; const struct attribute_group *group = devres->group; dev_dbg(dev, "%s: removing group %p\n", __func__, group); sysfs_remove_group(&dev->kobj, group); } static void devm_attr_groups_remove(struct device *dev, void *res) { union device_attr_group_devres *devres = res; const struct attribute_group **groups = devres->groups; dev_dbg(dev, "%s: removing groups %p\n", __func__, groups); sysfs_remove_groups(&dev->kobj, groups); } /** * devm_device_add_group - given a device, create a managed attribute group * @dev: The device to create the group for * @grp: The attribute group to create * * This function creates a group for the first time. It will explicitly * warn and error if any of the attribute files being created already exist. * * Returns 0 on success or error code on failure. */ int devm_device_add_group(struct device *dev, const struct attribute_group *grp) { union device_attr_group_devres *devres; int error; devres = devres_alloc(devm_attr_group_remove, sizeof(*devres), GFP_KERNEL); if (!devres) return -ENOMEM; error = sysfs_create_group(&dev->kobj, grp); if (error) { devres_free(devres); return error; } devres->group = grp; devres_add(dev, devres); return 0; } EXPORT_SYMBOL_GPL(devm_device_add_group); /** * devm_device_remove_group: remove a managed group from a device * @dev: device to remove the group from * @grp: group to remove * * This function removes a group of attributes from a device. The attributes * previously have to have been created for this group, otherwise it will fail. */ void devm_device_remove_group(struct device *dev, const struct attribute_group *grp) { WARN_ON(devres_release(dev, devm_attr_group_remove, devm_attr_group_match, /* cast away const */ (void *)grp)); } EXPORT_SYMBOL_GPL(devm_device_remove_group); /** * devm_device_add_groups - create a bunch of managed attribute groups * @dev: The device to create the group for * @groups: The attribute groups to create, NULL terminated * * This function creates a bunch of managed attribute groups. If an error * occurs when creating a group, all previously created groups will be * removed, unwinding everything back to the original state when this * function was called. It will explicitly warn and error if any of the * attribute files being created already exist. * * Returns 0 on success or error code from sysfs_create_group on failure. */ int devm_device_add_groups(struct device *dev, const struct attribute_group **groups) { union device_attr_group_devres *devres; int error; devres = devres_alloc(devm_attr_groups_remove, sizeof(*devres), GFP_KERNEL); if (!devres) return -ENOMEM; error = sysfs_create_groups(&dev->kobj, groups); if (error) { devres_free(devres); return error; } devres->groups = groups; devres_add(dev, devres); return 0; } EXPORT_SYMBOL_GPL(devm_device_add_groups); /** * devm_device_remove_groups - remove a list of managed groups * * @dev: The device for the groups to be removed from * @groups: NULL terminated list of groups to be removed * * If groups is not NULL, remove the specified groups from the device. */ void devm_device_remove_groups(struct device *dev, const struct attribute_group **groups) { WARN_ON(devres_release(dev, devm_attr_groups_remove, devm_attr_group_match, /* cast away const */ (void *)groups)); } EXPORT_SYMBOL_GPL(devm_device_remove_groups); static int device_add_attrs(struct device *dev) { struct class *class = dev->class; const struct device_type *type = dev->type; int error; if (class) { error = device_add_groups(dev, class->dev_groups); if (error) return error; } if (type) { error = device_add_groups(dev, type->groups); if (error) goto err_remove_class_groups; } error = device_add_groups(dev, dev->groups); if (error) goto err_remove_type_groups; if (device_supports_offline(dev) && !dev->offline_disabled) { error = device_create_file(dev, &dev_attr_online); if (error) goto err_remove_dev_groups; } if (fw_devlink_flags && !fw_devlink_is_permissive()) { error = device_create_file(dev, &dev_attr_waiting_for_supplier); if (error) goto err_remove_dev_online; } return 0; err_remove_dev_online: device_remove_file(dev, &dev_attr_online); err_remove_dev_groups: device_remove_groups(dev, dev->groups); err_remove_type_groups: if (type) device_remove_groups(dev, type->groups); err_remove_class_groups: if (class) device_remove_groups(dev, class->dev_groups); return error; } static void device_remove_attrs(struct device *dev) { struct class *class = dev->class; const struct device_type *type = dev->type; device_remove_file(dev, &dev_attr_waiting_for_supplier); device_remove_file(dev, &dev_attr_online); device_remove_groups(dev, dev->groups); if (type) device_remove_groups(dev, type->groups); if (class) device_remove_groups(dev, class->dev_groups); } static ssize_t dev_show(struct device *dev, struct device_attribute *attr, char *buf) { return print_dev_t(buf, dev->devt); } static DEVICE_ATTR_RO(dev); /* /sys/devices/ */ struct kset *devices_kset; /** * devices_kset_move_before - Move device in the devices_kset's list. * @deva: Device to move. * @devb: Device @deva should come before. */ static void devices_kset_move_before(struct device *deva, struct device *devb) { if (!devices_kset) return; pr_debug("devices_kset: Moving %s before %s\n", dev_name(deva), dev_name(devb)); spin_lock(&devices_kset->list_lock); list_move_tail(&deva->kobj.entry, &devb->kobj.entry); spin_unlock(&devices_kset->list_lock); } /** * devices_kset_move_after - Move device in the devices_kset's list. * @deva: Device to move * @devb: Device @deva should come after. */ static void devices_kset_move_after(struct device *deva, struct device *devb) { if (!devices_kset) return; pr_debug("devices_kset: Moving %s after %s\n", dev_name(deva), dev_name(devb)); spin_lock(&devices_kset->list_lock); list_move(&deva->kobj.entry, &devb->kobj.entry); spin_unlock(&devices_kset->list_lock); } /** * devices_kset_move_last - move the device to the end of devices_kset's list. * @dev: device to move */ void devices_kset_move_last(struct device *dev) { if (!devices_kset) return; pr_debug("devices_kset: Moving %s to end of list\n", dev_name(dev)); spin_lock(&devices_kset->list_lock); list_move_tail(&dev->kobj.entry, &devices_kset->list); spin_unlock(&devices_kset->list_lock); } /** * device_create_file - create sysfs attribute file for device. * @dev: device. * @attr: device attribute descriptor. */ int device_create_file(struct device *dev, const struct device_attribute *attr) { int error = 0; if (dev) { WARN(((attr->attr.mode & S_IWUGO) && !attr->store), "Attribute %s: write permission without 'store'\n", attr->attr.name); WARN(((attr->attr.mode & S_IRUGO) && !attr->show), "Attribute %s: read permission without 'show'\n", attr->attr.name); error = sysfs_create_file(&dev->kobj, &attr->attr); } return error; } EXPORT_SYMBOL_GPL(device_create_file); /** * device_remove_file - remove sysfs attribute file. * @dev: device. * @attr: device attribute descriptor. */ void device_remove_file(struct device *dev, const struct device_attribute *attr) { if (dev) sysfs_remove_file(&dev->kobj, &attr->attr); } EXPORT_SYMBOL_GPL(device_remove_file); /** * device_remove_file_self - remove sysfs attribute file from its own method. * @dev: device. * @attr: device attribute descriptor. * * See kernfs_remove_self() for details. */ bool device_remove_file_self(struct device *dev, const struct device_attribute *attr) { if (dev) return sysfs_remove_file_self(&dev->kobj, &attr->attr); else return false; } EXPORT_SYMBOL_GPL(device_remove_file_self); /** * device_create_bin_file - create sysfs binary attribute file for device. * @dev: device. * @attr: device binary attribute descriptor. */ int device_create_bin_file(struct device *dev, const struct bin_attribute *attr) { int error = -EINVAL; if (dev) error = sysfs_create_bin_file(&dev->kobj, attr); return error; } EXPORT_SYMBOL_GPL(device_create_bin_file); /** * device_remove_bin_file - remove sysfs binary attribute file * @dev: device. * @attr: device binary attribute descriptor. */ void device_remove_bin_file(struct device *dev, const struct bin_attribute *attr) { if (dev) sysfs_remove_bin_file(&dev->kobj, attr); } EXPORT_SYMBOL_GPL(device_remove_bin_file); static void klist_children_get(struct klist_node *n) { struct device_private *p = to_device_private_parent(n); struct device *dev = p->device; get_device(dev); } static void klist_children_put(struct klist_node *n) { struct device_private *p = to_device_private_parent(n); struct device *dev = p->device; put_device(dev); } /** * device_initialize - init device structure. * @dev: device. * * This prepares the device for use by other layers by initializing * its fields. * It is the first half of device_register(), if called by * that function, though it can also be called separately, so one * may use @dev's fields. In particular, get_device()/put_device() * may be used for reference counting of @dev after calling this * function. * * All fields in @dev must be initialized by the caller to 0, except * for those explicitly set to some other value. The simplest * approach is to use kzalloc() to allocate the structure containing * @dev. * * NOTE: Use put_device() to give up your reference instead of freeing * @dev directly once you have called this function. */ void device_initialize(struct device *dev) { dev->kobj.kset = devices_kset; kobject_init(&dev->kobj, &device_ktype); INIT_LIST_HEAD(&dev->dma_pools); mutex_init(&dev->mutex); #ifdef CONFIG_PROVE_LOCKING mutex_init(&dev->lockdep_mutex); #endif lockdep_set_novalidate_class(&dev->mutex); spin_lock_init(&dev->devres_lock); INIT_LIST_HEAD(&dev->devres_head); device_pm_init(dev); set_dev_node(dev, -1); #ifdef CONFIG_GENERIC_MSI_IRQ raw_spin_lock_init(&dev->msi_lock); INIT_LIST_HEAD(&dev->msi_list); #endif INIT_LIST_HEAD(&dev->links.consumers); INIT_LIST_HEAD(&dev->links.suppliers); INIT_LIST_HEAD(&dev->links.needs_suppliers); INIT_LIST_HEAD(&dev->links.defer_hook); dev->links.status = DL_DEV_NO_DRIVER; } EXPORT_SYMBOL_GPL(device_initialize); struct kobject *virtual_device_parent(struct device *dev) { static struct kobject *virtual_dir = NULL; if (!virtual_dir) virtual_dir = kobject_create_and_add("virtual", &devices_kset->kobj); return virtual_dir; } struct class_dir { struct kobject kobj; struct class *class; }; #define to_class_dir(obj) container_of(obj, struct class_dir, kobj) static void class_dir_release(struct kobject *kobj) { struct class_dir *dir = to_class_dir(kobj); kfree(dir); } static const struct kobj_ns_type_operations *class_dir_child_ns_type(struct kobject *kobj) { struct class_dir *dir = to_class_dir(kobj); return dir->class->ns_type; } static struct kobj_type class_dir_ktype = { .release = class_dir_release, .sysfs_ops = &kobj_sysfs_ops, .child_ns_type = class_dir_child_ns_type }; static struct kobject * class_dir_create_and_add(struct class *class, struct kobject *parent_kobj) { struct class_dir *dir; int retval; dir = kzalloc(sizeof(*dir), GFP_KERNEL); if (!dir) return ERR_PTR(-ENOMEM); dir->class = class; kobject_init(&dir->kobj, &class_dir_ktype); dir->kobj.kset = &class->p->glue_dirs; retval = kobject_add(&dir->kobj, parent_kobj, "%s", class->name); if (retval < 0) { kobject_put(&dir->kobj); return ERR_PTR(retval); } return &dir->kobj; } static DEFINE_MUTEX(gdp_mutex); static struct kobject *get_device_parent(struct device *dev, struct device *parent) { if (dev->class) { struct kobject *kobj = NULL; struct kobject *parent_kobj; struct kobject *k; #ifdef CONFIG_BLOCK /* block disks show up in /sys/block */ if (sysfs_deprecated && dev->class == &block_class) { if (parent && parent->class == &block_class) return &parent->kobj; return &block_class.p->subsys.kobj; } #endif /* * If we have no parent, we live in "virtual". * Class-devices with a non class-device as parent, live * in a "glue" directory to prevent namespace collisions. */ if (parent == NULL) parent_kobj = virtual_device_parent(dev); else if (parent->class && !dev->class->ns_type) return &parent->kobj; else parent_kobj = &parent->kobj; mutex_lock(&gdp_mutex); /* find our class-directory at the parent and reference it */ spin_lock(&dev->class->p->glue_dirs.list_lock); list_for_each_entry(k, &dev->class->p->glue_dirs.list, entry) if (k->parent == parent_kobj) { kobj = kobject_get(k); break; } spin_unlock(&dev->class->p->glue_dirs.list_lock); if (kobj) { mutex_unlock(&gdp_mutex); return kobj; } /* or create a new class-directory at the parent device */ k = class_dir_create_and_add(dev->class, parent_kobj); /* do not emit an uevent for this simple "glue" directory */ mutex_unlock(&gdp_mutex); return k; } /* subsystems can specify a default root directory for their devices */ if (!parent && dev->bus && dev->bus->dev_root) return &dev->bus->dev_root->kobj; if (parent) return &parent->kobj; return NULL; } static inline bool live_in_glue_dir(struct kobject *kobj, struct device *dev) { if (!kobj || !dev->class || kobj->kset != &dev->class->p->glue_dirs) return false; return true; } static inline struct kobject *get_glue_dir(struct device *dev) { return dev->kobj.parent; } /* * make sure cleaning up dir as the last step, we need to make * sure .release handler of kobject is run with holding the * global lock */ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir) { unsigned int ref; /* see if we live in a "glue" directory */ if (!live_in_glue_dir(glue_dir, dev)) return; mutex_lock(&gdp_mutex); /** * There is a race condition between removing glue directory * and adding a new device under the glue directory. * * CPU1: CPU2: * * device_add() * get_device_parent() * class_dir_create_and_add() * kobject_add_internal() * create_dir() // create glue_dir * * device_add() * get_device_parent() * kobject_get() // get glue_dir * * device_del() * cleanup_glue_dir() * kobject_del(glue_dir) * * kobject_add() * kobject_add_internal() * create_dir() // in glue_dir * sysfs_create_dir_ns() * kernfs_create_dir_ns(sd) * * sysfs_remove_dir() // glue_dir->sd=NULL * sysfs_put() // free glue_dir->sd * * // sd is freed * kernfs_new_node(sd) * kernfs_get(glue_dir) * kernfs_add_one() * kernfs_put() * * Before CPU1 remove last child device under glue dir, if CPU2 add * a new device under glue dir, the glue_dir kobject reference count * will be increase to 2 in kobject_get(k). And CPU2 has been called * kernfs_create_dir_ns(). Meanwhile, CPU1 call sysfs_remove_dir() * and sysfs_put(). This result in glue_dir->sd is freed. * * Then the CPU2 will see a stale "empty" but still potentially used * glue dir around in kernfs_new_node(). * * In order to avoid this happening, we also should make sure that * kernfs_node for glue_dir is released in CPU1 only when refcount * for glue_dir kobj is 1. */ ref = kref_read(&glue_dir->kref); if (!kobject_has_children(glue_dir) && !--ref) kobject_del(glue_dir); kobject_put(glue_dir); mutex_unlock(&gdp_mutex); } static int device_add_class_symlinks(struct device *dev) { struct device_node *of_node = dev_of_node(dev); int error; if (of_node) { error = sysfs_create_link(&dev->kobj, of_node_kobj(of_node), "of_node"); if (error) dev_warn(dev, "Error %d creating of_node link\n",error); /* An error here doesn't warrant bringing down the device */ } if (!dev->class) return 0; error = sysfs_create_link(&dev->kobj, &dev->class->p->subsys.kobj, "subsystem"); if (error) goto out_devnode; if (dev->parent && device_is_not_partition(dev)) { error = sysfs_create_link(&dev->kobj, &dev->parent->kobj, "device"); if (error) goto out_subsys; } #ifdef CONFIG_BLOCK /* /sys/block has directories and does not need symlinks */ if (sysfs_deprecated && dev->class == &block_class) return 0; #endif /* link in the class directory pointing to the device */ error = sysfs_create_link(&dev->class->p->subsys.kobj, &dev->kobj, dev_name(dev)); if (error) goto out_device; return 0; out_device: sysfs_remove_link(&dev->kobj, "device"); out_subsys: sysfs_remove_link(&dev->kobj, "subsystem"); out_devnode: sysfs_remove_link(&dev->kobj, "of_node"); return error; } static void device_remove_class_symlinks(struct device *dev) { if (dev_of_node(dev)) sysfs_remove_link(&dev->kobj, "of_node"); if (!dev->class) return; if (dev->parent && device_is_not_partition(dev)) sysfs_remove_link(&dev->kobj, "device"); sysfs_remove_link(&dev->kobj, "subsystem"); #ifdef CONFIG_BLOCK if (sysfs_deprecated && dev->class == &block_class) return; #endif sysfs_delete_link(&dev->class->p->subsys.kobj, &dev->kobj, dev_name(dev)); } /** * dev_set_name - set a device name * @dev: device * @fmt: format string for the device's name */ int dev_set_name(struct device *dev, const char *fmt, ...) { va_list vargs; int err; va_start(vargs, fmt); err = kobject_set_name_vargs(&dev->kobj, fmt, vargs); va_end(vargs); return err; } EXPORT_SYMBOL_GPL(dev_set_name); /** * device_to_dev_kobj - select a /sys/dev/ directory for the device * @dev: device * * By default we select char/ for new entries. Setting class->dev_obj * to NULL prevents an entry from being created. class->dev_kobj must * be set (or cleared) before any devices are registered to the class * otherwise device_create_sys_dev_entry() and * device_remove_sys_dev_entry() will disagree about the presence of * the link. */ static struct kobject *device_to_dev_kobj(struct device *dev) { struct kobject *kobj; if (dev->class) kobj = dev->class->dev_kobj; else kobj = sysfs_dev_char_kobj; return kobj; } static int device_create_sys_dev_entry(struct device *dev) { struct kobject *kobj = device_to_dev_kobj(dev); int error = 0; char devt_str[15]; if (kobj) { format_dev_t(devt_str, dev->devt); error = sysfs_create_link(kobj, &dev->kobj, devt_str); } return error; } static void device_remove_sys_dev_entry(struct device *dev) { struct kobject *kobj = device_to_dev_kobj(dev); char devt_str[15]; if (kobj) { format_dev_t(devt_str, dev->devt); sysfs_remove_link(kobj, devt_str); } } static int device_private_init(struct device *dev) { dev->p = kzalloc(sizeof(*dev->p), GFP_KERNEL); if (!dev->p) return -ENOMEM; dev->p->device = dev; klist_init(&dev->p->klist_children, klist_children_get, klist_children_put); INIT_LIST_HEAD(&dev->p->deferred_probe); return 0; } /** * device_add - add device to device hierarchy. * @dev: device. * * This is part 2 of device_register(), though may be called * separately _iff_ device_initialize() has been called separately. * * This adds @dev to the kobject hierarchy via kobject_add(), adds it * to the global and sibling lists for the device, then * adds it to the other relevant subsystems of the driver model. * * Do not call this routine or device_register() more than once for * any device structure. The driver model core is not designed to work * with devices that get unregistered and then spring back to life. * (Among other things, it's very hard to guarantee that all references * to the previous incarnation of @dev have been dropped.) Allocate * and register a fresh new struct device instead. * * NOTE: _Never_ directly free @dev after calling this function, even * if it returned an error! Always use put_device() to give up your * reference instead. * * Rule of thumb is: if device_add() succeeds, you should call * device_del() when you want to get rid of it. If device_add() has * *not* succeeded, use *only* put_device() to drop the reference * count. */ int device_add(struct device *dev) { struct device *parent; struct kobject *kobj; struct class_interface *class_intf; int error = -EINVAL; struct kobject *glue_dir = NULL; dev = get_device(dev); if (!dev) goto done; if (!dev->p) { error = device_private_init(dev); if (error) goto done; } /* * for statically allocated devices, which should all be converted * some day, we need to initialize the name. We prevent reading back * the name, and force the use of dev_name() */ if (dev->init_name) { dev_set_name(dev, "%s", dev->init_name); dev->init_name = NULL; } /* subsystems can specify simple device enumeration */ if (!dev_name(dev) && dev->bus && dev->bus->dev_name) dev_set_name(dev, "%s%u", dev->bus->dev_name, dev->id); if (!dev_name(dev)) { error = -EINVAL; goto name_error; } pr_debug("device: '%s': %s\n", dev_name(dev), __func__); parent = get_device(dev->parent); kobj = get_device_parent(dev, parent); if (IS_ERR(kobj)) { error = PTR_ERR(kobj); goto parent_error; } if (kobj) dev->kobj.parent = kobj; /* use parent numa_node */ if (parent && (dev_to_node(dev) == NUMA_NO_NODE)) set_dev_node(dev, dev_to_node(parent)); /* first, register with generic layer. */ /* we require the name to be set before, and pass NULL */ error = kobject_add(&dev->kobj, dev->kobj.parent, NULL); if (error) { glue_dir = get_glue_dir(dev); goto Error; } /* notify platform of device entry */ error = device_platform_notify(dev, KOBJ_ADD); if (error) goto platform_error; error = device_create_file(dev, &dev_attr_uevent); if (error) goto attrError; error = device_add_class_symlinks(dev); if (error) goto SymlinkError; error = device_add_attrs(dev); if (error) goto AttrsError; error = bus_add_device(dev); if (error) goto BusError; error = dpm_sysfs_add(dev); if (error) goto DPMError; device_pm_add(dev); if (MAJOR(dev->devt)) { error = device_create_file(dev, &dev_attr_dev); if (error) goto DevAttrError; error = device_create_sys_dev_entry(dev); if (error) goto SysEntryError; devtmpfs_create_node(dev); } /* Notify clients of device addition. This call must come * after dpm_sysfs_add() and before kobject_uevent(). */ if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_ADD_DEVICE, dev); kobject_uevent(&dev->kobj, KOBJ_ADD); /* * Check if any of the other devices (consumers) have been waiting for * this device (supplier) to be added so that they can create a device * link to it. * * This needs to happen after device_pm_add() because device_link_add() * requires the supplier be registered before it's called. * * But this also needs to happen before bus_probe_device() to make sure * waiting consumers can link to it before the driver is bound to the * device and the driver sync_state callback is called for this device. */ if (dev->fwnode && !dev->fwnode->dev) { dev->fwnode->dev = dev; fw_devlink_link_device(dev); } bus_probe_device(dev); if (parent) klist_add_tail(&dev->p->knode_parent, &parent->p->klist_children); if (dev->class) { mutex_lock(&dev->class->p->mutex); /* tie the class to the device */ klist_add_tail(&dev->p->knode_class, &dev->class->p->klist_devices); /* notify any interfaces that the device is here */ list_for_each_entry(class_intf, &dev->class->p->interfaces, node) if (class_intf->add_dev) class_intf->add_dev(dev, class_intf); mutex_unlock(&dev->class->p->mutex); } done: put_device(dev); return error; SysEntryError: if (MAJOR(dev->devt)) device_remove_file(dev, &dev_attr_dev); DevAttrError: device_pm_remove(dev); dpm_sysfs_remove(dev); DPMError: bus_remove_device(dev); BusError: device_remove_attrs(dev); AttrsError: device_remove_class_symlinks(dev); SymlinkError: device_remove_file(dev, &dev_attr_uevent); attrError: device_platform_notify(dev, KOBJ_REMOVE); platform_error: kobject_uevent(&dev->kobj, KOBJ_REMOVE); glue_dir = get_glue_dir(dev); kobject_del(&dev->kobj); Error: cleanup_glue_dir(dev, glue_dir); parent_error: put_device(parent); name_error: kfree(dev->p); dev->p = NULL; goto done; } EXPORT_SYMBOL_GPL(device_add); /** * device_register - register a device with the system. * @dev: pointer to the device structure * * This happens in two clean steps - initialize the device * and add it to the system. The two steps can be called * separately, but this is the easiest and most common. * I.e. you should only call the two helpers separately if * have a clearly defined need to use and refcount the device * before it is added to the hierarchy. * * For more information, see the kerneldoc for device_initialize() * and device_add(). * * NOTE: _Never_ directly free @dev after calling this function, even * if it returned an error! Always use put_device() to give up the * reference initialized in this function instead. */ int device_register(struct device *dev) { device_initialize(dev); return device_add(dev); } EXPORT_SYMBOL_GPL(device_register); /** * get_device - increment reference count for device. * @dev: device. * * This simply forwards the call to kobject_get(), though * we do take care to provide for the case that we get a NULL * pointer passed in. */ struct device *get_device(struct device *dev) { return dev ? kobj_to_dev(kobject_get(&dev->kobj)) : NULL; } EXPORT_SYMBOL_GPL(get_device); /** * put_device - decrement reference count. * @dev: device in question. */ void put_device(struct device *dev) { /* might_sleep(); */ if (dev) kobject_put(&dev->kobj); } EXPORT_SYMBOL_GPL(put_device); bool kill_device(struct device *dev) { /* * Require the device lock and set the "dead" flag to guarantee that * the update behavior is consistent with the other bitfields near * it and that we cannot have an asynchronous probe routine trying * to run while we are tearing out the bus/class/sysfs from * underneath the device. */ lockdep_assert_held(&dev->mutex); if (dev->p->dead) return false; dev->p->dead = true; return true; } EXPORT_SYMBOL_GPL(kill_device); /** * device_del - delete device from system. * @dev: device. * * This is the first part of the device unregistration * sequence. This removes the device from the lists we control * from here, has it removed from the other driver model * subsystems it was added to in device_add(), and removes it * from the kobject hierarchy. * * NOTE: this should be called manually _iff_ device_add() was * also called manually. */ void device_del(struct device *dev) { struct device *parent = dev->parent; struct kobject *glue_dir = NULL; struct class_interface *class_intf; unsigned int noio_flag; device_lock(dev); kill_device(dev); device_unlock(dev); if (dev->fwnode && dev->fwnode->dev == dev) dev->fwnode->dev = NULL; /* Notify clients of device removal. This call must come * before dpm_sysfs_remove(). */ noio_flag = memalloc_noio_save(); if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DEL_DEVICE, dev); dpm_sysfs_remove(dev); if (parent) klist_del(&dev->p->knode_parent); if (MAJOR(dev->devt)) { devtmpfs_delete_node(dev); device_remove_sys_dev_entry(dev); device_remove_file(dev, &dev_attr_dev); } if (dev->class) { device_remove_class_symlinks(dev); mutex_lock(&dev->class->p->mutex); /* notify any interfaces that the device is now gone */ list_for_each_entry(class_intf, &dev->class->p->interfaces, node) if (class_intf->remove_dev) class_intf->remove_dev(dev, class_intf); /* remove the device from the class list */ klist_del(&dev->p->knode_class); mutex_unlock(&dev->class->p->mutex); } device_remove_file(dev, &dev_attr_uevent); device_remove_attrs(dev); bus_remove_device(dev); device_pm_remove(dev); driver_deferred_probe_del(dev); device_platform_notify(dev, KOBJ_REMOVE); device_remove_properties(dev); device_links_purge(dev); if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_REMOVED_DEVICE, dev); kobject_uevent(&dev->kobj, KOBJ_REMOVE); glue_dir = get_glue_dir(dev); kobject_del(&dev->kobj); cleanup_glue_dir(dev, glue_dir); memalloc_noio_restore(noio_flag); put_device(parent); } EXPORT_SYMBOL_GPL(device_del); /** * device_unregister - unregister device from system. * @dev: device going away. * * We do this in two parts, like we do device_register(). First, * we remove it from all the subsystems with device_del(), then * we decrement the reference count via put_device(). If that * is the final reference count, the device will be cleaned up * via device_release() above. Otherwise, the structure will * stick around until the final reference to the device is dropped. */ void device_unregister(struct device *dev) { pr_debug("device: '%s': %s\n", dev_name(dev), __func__); device_del(dev); put_device(dev); } EXPORT_SYMBOL_GPL(device_unregister); static struct device *prev_device(struct klist_iter *i) { struct klist_node *n = klist_prev(i); struct device *dev = NULL; struct device_private *p; if (n) { p = to_device_private_parent(n); dev = p->device; } return dev; } static struct device *next_device(struct klist_iter *i) { struct klist_node *n = klist_next(i); struct device *dev = NULL; struct device_private *p; if (n) { p = to_device_private_parent(n); dev = p->device; } return dev; } /** * device_get_devnode - path of device node file * @dev: device * @mode: returned file access mode * @uid: returned file owner * @gid: returned file group * @tmp: possibly allocated string * * Return the relative path of a possible device node. * Non-default names may need to allocate a memory to compose * a name. This memory is returned in tmp and needs to be * freed by the caller. */ const char *device_get_devnode(struct device *dev, umode_t *mode, kuid_t *uid, kgid_t *gid, const char **tmp) { char *s; *tmp = NULL; /* the device type may provide a specific name */ if (dev->type && dev->type->devnode) *tmp = dev->type->devnode(dev, mode, uid, gid); if (*tmp) return *tmp; /* the class may provide a specific name */ if (dev->class && dev->class->devnode) *tmp = dev->class->devnode(dev, mode); if (*tmp) return *tmp; /* return name without allocation, tmp == NULL */ if (strchr(dev_name(dev), '!') == NULL) return dev_name(dev); /* replace '!' in the name with '/' */ s = kstrdup(dev_name(dev), GFP_KERNEL); if (!s) return NULL; strreplace(s, '!', '/'); return *tmp = s; } /** * device_for_each_child - device child iterator. * @parent: parent struct device. * @fn: function to be called for each device. * @data: data for the callback. * * Iterate over @parent's child devices, and call @fn for each, * passing it @data. * * We check the return of @fn each time. If it returns anything * other than 0, we break out and return that value. */ int device_for_each_child(struct device *parent, void *data, int (*fn)(struct device *dev, void *data)) { struct klist_iter i; struct device *child; int error = 0; if (!parent->p) return 0; klist_iter_init(&parent->p->klist_children, &i); while (!error && (child = next_device(&i))) error = fn(child, data); klist_iter_exit(&i); return error; } EXPORT_SYMBOL_GPL(device_for_each_child); /** * device_for_each_child_reverse - device child iterator in reversed order. * @parent: parent struct device. * @fn: function to be called for each device. * @data: data for the callback. * * Iterate over @parent's child devices, and call @fn for each, * passing it @data. * * We check the return of @fn each time. If it returns anything * other than 0, we break out and return that value. */ int device_for_each_child_reverse(struct device *parent, void *data, int (*fn)(struct device *dev, void *data)) { struct klist_iter i; struct device *child; int error = 0; if (!parent->p) return 0; klist_iter_init(&parent->p->klist_children, &i); while ((child = prev_device(&i)) && !error) error = fn(child, data); klist_iter_exit(&i); return error; } EXPORT_SYMBOL_GPL(device_for_each_child_reverse); /** * device_find_child - device iterator for locating a particular device. * @parent: parent struct device * @match: Callback function to check device * @data: Data to pass to match function * * This is similar to the device_for_each_child() function above, but it * returns a reference to a device that is 'found' for later use, as * determined by the @match callback. * * The callback should return 0 if the device doesn't match and non-zero * if it does. If the callback returns non-zero and a reference to the * current device can be obtained, this function will return to the caller * and not iterate over any more devices. * * NOTE: you will need to drop the reference with put_device() after use. */ struct device *device_find_child(struct device *parent, void *data, int (*match)(struct device *dev, void *data)) { struct klist_iter i; struct device *child; if (!parent) return NULL; klist_iter_init(&parent->p->klist_children, &i); while ((child = next_device(&i))) if (match(child, data) && get_device(child)) break; klist_iter_exit(&i); return child; } EXPORT_SYMBOL_GPL(device_find_child); /** * device_find_child_by_name - device iterator for locating a child device. * @parent: parent struct device * @name: name of the child device * * This is similar to the device_find_child() function above, but it * returns a reference to a device that has the name @name. * * NOTE: you will need to drop the reference with put_device() after use. */ struct device *device_find_child_by_name(struct device *parent, const char *name) { struct klist_iter i; struct device *child; if (!parent) return NULL; klist_iter_init(&parent->p->klist_children, &i); while ((child = next_device(&i))) if (sysfs_streq(dev_name(child), name) && get_device(child)) break; klist_iter_exit(&i); return child; } EXPORT_SYMBOL_GPL(device_find_child_by_name); int __init devices_init(void) { devices_kset = kset_create_and_add("devices", &device_uevent_ops, NULL); if (!devices_kset) return -ENOMEM; dev_kobj = kobject_create_and_add("dev", NULL); if (!dev_kobj) goto dev_kobj_err; sysfs_dev_block_kobj = kobject_create_and_add("block", dev_kobj); if (!sysfs_dev_block_kobj) goto block_kobj_err; sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj); if (!sysfs_dev_char_kobj) goto char_kobj_err; return 0; char_kobj_err: kobject_put(sysfs_dev_block_kobj); block_kobj_err: kobject_put(dev_kobj); dev_kobj_err: kset_unregister(devices_kset); return -ENOMEM; } static int device_check_offline(struct device *dev, void *not_used) { int ret; ret = device_for_each_child(dev, NULL, device_check_offline); if (ret) return ret; return device_supports_offline(dev) && !dev->offline ? -EBUSY : 0; } /** * device_offline - Prepare the device for hot-removal. * @dev: Device to be put offline. * * Execute the device bus type's .offline() callback, if present, to prepare * the device for a subsequent hot-removal. If that succeeds, the device must * not be used until either it is removed or its bus type's .online() callback * is executed. * * Call under device_hotplug_lock. */ int device_offline(struct device *dev) { int ret; if (dev->offline_disabled) return -EPERM; ret = device_for_each_child(dev, NULL, device_check_offline); if (ret) return ret; device_lock(dev); if (device_supports_offline(dev)) { if (dev->offline) { ret = 1; } else { ret = dev->bus->offline(dev); if (!ret) { kobject_uevent(&dev->kobj, KOBJ_OFFLINE); dev->offline = true; } } } device_unlock(dev); return ret; } /** * device_online - Put the device back online after successful device_offline(). * @dev: Device to be put back online. * * If device_offline() has been successfully executed for @dev, but the device * has not been removed subsequently, execute its bus type's .online() callback * to indicate that the device can be used again. * * Call under device_hotplug_lock. */ int device_online(struct device *dev) { int ret = 0; device_lock(dev); if (device_supports_offline(dev)) { if (dev->offline) { ret = dev->bus->online(dev); if (!ret) { kobject_uevent(&dev->kobj, KOBJ_ONLINE); dev->offline = false; } } else { ret = 1; } } device_unlock(dev); return ret; } struct root_device { struct device dev; struct module *owner; }; static inline struct root_device *to_root_device(struct device *d) { return container_of(d, struct root_device, dev); } static void root_device_release(struct device *dev) { kfree(to_root_device(dev)); } /** * __root_device_register - allocate and register a root device * @name: root device name * @owner: owner module of the root device, usually THIS_MODULE * * This function allocates a root device and registers it * using device_register(). In order to free the returned * device, use root_device_unregister(). * * Root devices are dummy devices which allow other devices * to be grouped under /sys/devices. Use this function to * allocate a root device and then use it as the parent of * any device which should appear under /sys/devices/{name} * * The /sys/devices/{name} directory will also contain a * 'module' symlink which points to the @owner directory * in sysfs. * * Returns &struct device pointer on success, or ERR_PTR() on error. * * Note: You probably want to use root_device_register(). */ struct device *__root_device_register(const char *name, struct module *owner) { struct root_device *root; int err = -ENOMEM; root = kzalloc(sizeof(struct root_device), GFP_KERNEL); if (!root) return ERR_PTR(err); err = dev_set_name(&root->dev, "%s", name); if (err) { kfree(root); return ERR_PTR(err); } root->dev.release = root_device_release; err = device_register(&root->dev); if (err) { put_device(&root->dev); return ERR_PTR(err); } #ifdef CONFIG_MODULES /* gotta find a "cleaner" way to do this */ if (owner) { struct module_kobject *mk = &owner->mkobj; err = sysfs_create_link(&root->dev.kobj, &mk->kobj, "module"); if (err) { device_unregister(&root->dev); return ERR_PTR(err); } root->owner = owner; } #endif return &root->dev; } EXPORT_SYMBOL_GPL(__root_device_register); /** * root_device_unregister - unregister and free a root device * @dev: device going away * * This function unregisters and cleans up a device that was created by * root_device_register(). */ void root_device_unregister(struct device *dev) { struct root_device *root = to_root_device(dev); if (root->owner) sysfs_remove_link(&root->dev.kobj, "module"); device_unregister(dev); } EXPORT_SYMBOL_GPL(root_device_unregister); static void device_create_release(struct device *dev) { pr_debug("device: '%s': %s\n", dev_name(dev), __func__); kfree(dev); } static __printf(6, 0) struct device * device_create_groups_vargs(struct class *class, struct device *parent, dev_t devt, void *drvdata, const struct attribute_group **groups, const char *fmt, va_list args) { struct device *dev = NULL; int retval = -ENODEV; if (class == NULL || IS_ERR(class)) goto error; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) { retval = -ENOMEM; goto error; } device_initialize(dev); dev->devt = devt; dev->class = class; dev->parent = parent; dev->groups = groups; dev->release = device_create_release; dev_set_drvdata(dev, drvdata); retval = kobject_set_name_vargs(&dev->kobj, fmt, args); if (retval) goto error; retval = device_add(dev); if (retval) goto error; return dev; error: put_device(dev); return ERR_PTR(retval); } /** * device_create - creates a device and registers it with sysfs * @class: pointer to the struct class that this device should be registered to * @parent: pointer to the parent struct device of this new device, if any * @devt: the dev_t for the char device to be added * @drvdata: the data to be added to the device for callbacks * @fmt: string for the device's name * * This function can be used by char device classes. A struct device * will be created in sysfs, registered to the specified class. * * A "dev" file will be created, showing the dev_t for the device, if * the dev_t is not 0,0. * If a pointer to a parent struct device is passed in, the newly created * struct device will be a child of that device in sysfs. * The pointer to the struct device will be returned from the call. * Any further sysfs files that might be required can be created using this * pointer. * * Returns &struct device pointer on success, or ERR_PTR() on error. * * Note: the struct class passed to this function must have previously * been created with a call to class_create(). */ struct device *device_create(struct class *class, struct device *parent, dev_t devt, void *drvdata, const char *fmt, ...) { va_list vargs; struct device *dev; va_start(vargs, fmt); dev = device_create_groups_vargs(class, parent, devt, drvdata, NULL, fmt, vargs); va_end(vargs); return dev; } EXPORT_SYMBOL_GPL(device_create); /** * device_create_with_groups - creates a device and registers it with sysfs * @class: pointer to the struct class that this device should be registered to * @parent: pointer to the parent struct device of this new device, if any * @devt: the dev_t for the char device to be added * @drvdata: the data to be added to the device for callbacks * @groups: NULL-terminated list of attribute groups to be created * @fmt: string for the device's name * * This function can be used by char device classes. A struct device * will be created in sysfs, registered to the specified class. * Additional attributes specified in the groups parameter will also * be created automatically. * * A "dev" file will be created, showing the dev_t for the device, if * the dev_t is not 0,0. * If a pointer to a parent struct device is passed in, the newly created * struct device will be a child of that device in sysfs. * The pointer to the struct device will be returned from the call. * Any further sysfs files that might be required can be created using this * pointer. * * Returns &struct device pointer on success, or ERR_PTR() on error. * * Note: the struct class passed to this function must have previously * been created with a call to class_create(). */ struct device *device_create_with_groups(struct class *class, struct device *parent, dev_t devt, void *drvdata, const struct attribute_group **groups, const char *fmt, ...) { va_list vargs; struct device *dev; va_start(vargs, fmt); dev = device_create_groups_vargs(class, parent, devt, drvdata, groups, fmt, vargs); va_end(vargs); return dev; } EXPORT_SYMBOL_GPL(device_create_with_groups); /** * device_destroy - removes a device that was created with device_create() * @class: pointer to the struct class that this device was registered with * @devt: the dev_t of the device that was previously registered * * This call unregisters and cleans up a device that was created with a * call to device_create(). */ void device_destroy(struct class *class, dev_t devt) { struct device *dev; dev = class_find_device_by_devt(class, devt); if (dev) { put_device(dev); device_unregister(dev); } } EXPORT_SYMBOL_GPL(device_destroy); /** * device_rename - renames a device * @dev: the pointer to the struct device to be renamed * @new_name: the new name of the device * * It is the responsibility of the caller to provide mutual * exclusion between two different calls of device_rename * on the same device to ensure that new_name is valid and * won't conflict with other devices. * * Note: Don't call this function. Currently, the networking layer calls this * function, but that will change. The following text from Kay Sievers offers * some insight: * * Renaming devices is racy at many levels, symlinks and other stuff are not * replaced atomically, and you get a "move" uevent, but it's not easy to * connect the event to the old and new device. Device nodes are not renamed at * all, there isn't even support for that in the kernel now. * * In the meantime, during renaming, your target name might be taken by another * driver, creating conflicts. Or the old name is taken directly after you * renamed it -- then you get events for the same DEVPATH, before you even see * the "move" event. It's just a mess, and nothing new should ever rely on * kernel device renaming. Besides that, it's not even implemented now for * other things than (driver-core wise very simple) network devices. * * We are currently about to change network renaming in udev to completely * disallow renaming of devices in the same namespace as the kernel uses, * because we can't solve the problems properly, that arise with swapping names * of multiple interfaces without races. Means, renaming of eth[0-9]* will only * be allowed to some other name than eth[0-9]*, for the aforementioned * reasons. * * Make up a "real" name in the driver before you register anything, or add * some other attributes for userspace to find the device, or use udev to add * symlinks -- but never rename kernel devices later, it's a complete mess. We * don't even want to get into that and try to implement the missing pieces in * the core. We really have other pieces to fix in the driver core mess. :) */ int device_rename(struct device *dev, const char *new_name) { struct kobject *kobj = &dev->kobj; char *old_device_name = NULL; int error; dev = get_device(dev); if (!dev) return -EINVAL; dev_dbg(dev, "renaming to %s\n", new_name); old_device_name = kstrdup(dev_name(dev), GFP_KERNEL); if (!old_device_name) { error = -ENOMEM; goto out; } if (dev->class) { error = sysfs_rename_link_ns(&dev->class->p->subsys.kobj, kobj, old_device_name, new_name, kobject_namespace(kobj)); if (error) goto out; } error = kobject_rename(kobj, new_name); if (error) goto out; out: put_device(dev); kfree(old_device_name); return error; } EXPORT_SYMBOL_GPL(device_rename); static int device_move_class_links(struct device *dev, struct device *old_parent, struct device *new_parent) { int error = 0; if (old_parent) sysfs_remove_link(&dev->kobj, "device"); if (new_parent) error = sysfs_create_link(&dev->kobj, &new_parent->kobj, "device"); return error; } /** * device_move - moves a device to a new parent * @dev: the pointer to the struct device to be moved * @new_parent: the new parent of the device (can be NULL) * @dpm_order: how to reorder the dpm_list */ int device_move(struct device *dev, struct device *new_parent, enum dpm_order dpm_order) { int error; struct device *old_parent; struct kobject *new_parent_kobj; dev = get_device(dev); if (!dev) return -EINVAL; device_pm_lock(); new_parent = get_device(new_parent); new_parent_kobj = get_device_parent(dev, new_parent); if (IS_ERR(new_parent_kobj)) { error = PTR_ERR(new_parent_kobj); put_device(new_parent); goto out; } pr_debug("device: '%s': %s: moving to '%s'\n", dev_name(dev), __func__, new_parent ? dev_name(new_parent) : "<NULL>"); error = kobject_move(&dev->kobj, new_parent_kobj); if (error) { cleanup_glue_dir(dev, new_parent_kobj); put_device(new_parent); goto out; } old_parent = dev->parent; dev->parent = new_parent; if (old_parent) klist_remove(&dev->p->knode_parent); if (new_parent) { klist_add_tail(&dev->p->knode_parent, &new_parent->p->klist_children); set_dev_node(dev, dev_to_node(new_parent)); } if (dev->class) { error = device_move_class_links(dev, old_parent, new_parent); if (error) { /* We ignore errors on cleanup since we're hosed anyway... */ device_move_class_links(dev, new_parent, old_parent); if (!kobject_move(&dev->kobj, &old_parent->kobj)) { if (new_parent) klist_remove(&dev->p->knode_parent); dev->parent = old_parent; if (old_parent) { klist_add_tail(&dev->p->knode_parent, &old_parent->p->klist_children); set_dev_node(dev, dev_to_node(old_parent)); } } cleanup_glue_dir(dev, new_parent_kobj); put_device(new_parent); goto out; } } switch (dpm_order) { case DPM_ORDER_NONE: break; case DPM_ORDER_DEV_AFTER_PARENT: device_pm_move_after(dev, new_parent); devices_kset_move_after(dev, new_parent); break; case DPM_ORDER_PARENT_BEFORE_DEV: device_pm_move_before(new_parent, dev); devices_kset_move_before(new_parent, dev); break; case DPM_ORDER_DEV_LAST: device_pm_move_last(dev); devices_kset_move_last(dev); break; } put_device(old_parent); out: device_pm_unlock(); put_device(dev); return error; } EXPORT_SYMBOL_GPL(device_move); static int device_attrs_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) { struct kobject *kobj = &dev->kobj; struct class *class = dev->class; const struct device_type *type = dev->type; int error; if (class) { /* * Change the device groups of the device class for @dev to * @kuid/@kgid. */ error = sysfs_groups_change_owner(kobj, class->dev_groups, kuid, kgid); if (error) return error; } if (type) { /* * Change the device groups of the device type for @dev to * @kuid/@kgid. */ error = sysfs_groups_change_owner(kobj, type->groups, kuid, kgid); if (error) return error; } /* Change the device groups of @dev to @kuid/@kgid. */ error = sysfs_groups_change_owner(kobj, dev->groups, kuid, kgid); if (error) return error; if (device_supports_offline(dev) && !dev->offline_disabled) { /* Change online device attributes of @dev to @kuid/@kgid. */ error = sysfs_file_change_owner(kobj, dev_attr_online.attr.name, kuid, kgid); if (error) return error; } return 0; } /** * device_change_owner - change the owner of an existing device. * @dev: device. * @kuid: new owner's kuid * @kgid: new owner's kgid * * This changes the owner of @dev and its corresponding sysfs entries to * @kuid/@kgid. This function closely mirrors how @dev was added via driver * core. * * Returns 0 on success or error code on failure. */ int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) { int error; struct kobject *kobj = &dev->kobj; dev = get_device(dev); if (!dev) return -EINVAL; /* * Change the kobject and the default attributes and groups of the * ktype associated with it to @kuid/@kgid. */ error = sysfs_change_owner(kobj, kuid, kgid); if (error) goto out; /* * Change the uevent file for @dev to the new owner. The uevent file * was created in a separate step when @dev got added and we mirror * that step here. */ error = sysfs_file_change_owner(kobj, dev_attr_uevent.attr.name, kuid, kgid); if (error) goto out; /* * Change the device groups, the device groups associated with the * device class, and the groups associated with the device type of @dev * to @kuid/@kgid. */ error = device_attrs_change_owner(dev, kuid, kgid); if (error) goto out; error = dpm_sysfs_change_owner(dev, kuid, kgid); if (error) goto out; #ifdef CONFIG_BLOCK if (sysfs_deprecated && dev->class == &block_class) goto out; #endif /* * Change the owner of the symlink located in the class directory of * the device class associated with @dev which points to the actual * directory entry for @dev to @kuid/@kgid. This ensures that the * symlink shows the same permissions as its target. */ error = sysfs_link_change_owner(&dev->class->p->subsys.kobj, &dev->kobj, dev_name(dev), kuid, kgid); if (error) goto out; out: put_device(dev); return error; } EXPORT_SYMBOL_GPL(device_change_owner); /** * device_shutdown - call ->shutdown() on each device to shutdown. */ void device_shutdown(void) { struct device *dev, *parent; wait_for_device_probe(); device_block_probing(); cpufreq_suspend(); spin_lock(&devices_kset->list_lock); /* * Walk the devices list backward, shutting down each in turn. * Beware that device unplug events may also start pulling * devices offline, even as the system is shutting down. */ while (!list_empty(&devices_kset->list)) { dev = list_entry(devices_kset->list.prev, struct device, kobj.entry); /* * hold reference count of device's parent to * prevent it from being freed because parent's * lock is to be held */ parent = get_device(dev->parent); get_device(dev); /* * Make sure the device is off the kset list, in the * event that dev->*->shutdown() doesn't remove it. */ list_del_init(&dev->kobj.entry); spin_unlock(&devices_kset->list_lock); /* hold lock to avoid race with probe/release */ if (parent) device_lock(parent); device_lock(dev); /* Don't allow any more runtime suspends */ pm_runtime_get_noresume(dev); pm_runtime_barrier(dev); if (dev->class && dev->class->shutdown_pre) { if (initcall_debug) dev_info(dev, "shutdown_pre\n"); dev->class->shutdown_pre(dev); } if (dev->bus && dev->bus->shutdown) { if (initcall_debug) dev_info(dev, "shutdown\n"); dev->bus->shutdown(dev); } else if (dev->driver && dev->driver->shutdown) { if (initcall_debug) dev_info(dev, "shutdown\n"); dev->driver->shutdown(dev); } device_unlock(dev); if (parent) device_unlock(parent); put_device(dev); put_device(parent); spin_lock(&devices_kset->list_lock); } spin_unlock(&devices_kset->list_lock); } /* * Device logging functions */ #ifdef CONFIG_PRINTK static void set_dev_info(const struct device *dev, struct dev_printk_info *dev_info) { const char *subsys; memset(dev_info, 0, sizeof(*dev_info)); if (dev->class) subsys = dev->class->name; else if (dev->bus) subsys = dev->bus->name; else return; strscpy(dev_info->subsystem, subsys, sizeof(dev_info->subsystem)); /* * Add device identifier DEVICE=: * b12:8 block dev_t * c127:3 char dev_t * n8 netdev ifindex * +sound:card0 subsystem:devname */ if (MAJOR(dev->devt)) { char c; if (strcmp(subsys, "block") == 0) c = 'b'; else c = 'c'; snprintf(dev_info->device, sizeof(dev_info->device), "%c%u:%u", c, MAJOR(dev->devt), MINOR(dev->devt)); } else if (strcmp(subsys, "net") == 0) { struct net_device *net = to_net_dev(dev); snprintf(dev_info->device, sizeof(dev_info->device), "n%u", net->ifindex); } else { snprintf(dev_info->device, sizeof(dev_info->device), "+%s:%s", subsys, dev_name(dev)); } } int dev_vprintk_emit(int level, const struct device *dev, const char *fmt, va_list args) { struct dev_printk_info dev_info; set_dev_info(dev, &dev_info); return vprintk_emit(0, level, &dev_info, fmt, args); } EXPORT_SYMBOL(dev_vprintk_emit); int dev_printk_emit(int level, const struct device *dev, const char *fmt, ...) { va_list args; int r; va_start(args, fmt); r = dev_vprintk_emit(level, dev, fmt, args); va_end(args); return r; } EXPORT_SYMBOL(dev_printk_emit); static void __dev_printk(const char *level, const struct device *dev, struct va_format *vaf) { if (dev) dev_printk_emit(level[1] - '0', dev, "%s %s: %pV", dev_driver_string(dev), dev_name(dev), vaf); else printk("%s(NULL device *): %pV", level, vaf); } void dev_printk(const char *level, const struct device *dev, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; __dev_printk(level, dev, &vaf); va_end(args); } EXPORT_SYMBOL(dev_printk); #define define_dev_printk_level(func, kern_level) \ void func(const struct device *dev, const char *fmt, ...) \ { \ struct va_format vaf; \ va_list args; \ \ va_start(args, fmt); \ \ vaf.fmt = fmt; \ vaf.va = &args; \ \ __dev_printk(kern_level, dev, &vaf); \ \ va_end(args); \ } \ EXPORT_SYMBOL(func); define_dev_printk_level(_dev_emerg, KERN_EMERG); define_dev_printk_level(_dev_alert, KERN_ALERT); define_dev_printk_level(_dev_crit, KERN_CRIT); define_dev_printk_level(_dev_err, KERN_ERR); define_dev_printk_level(_dev_warn, KERN_WARNING); define_dev_printk_level(_dev_notice, KERN_NOTICE); define_dev_printk_level(_dev_info, KERN_INFO); #endif /** * dev_err_probe - probe error check and log helper * @dev: the pointer to the struct device * @err: error value to test * @fmt: printf-style format string * @...: arguments as specified in the format string * * This helper implements common pattern present in probe functions for error * checking: print debug or error message depending if the error value is * -EPROBE_DEFER and propagate error upwards. * In case of -EPROBE_DEFER it sets also defer probe reason, which can be * checked later by reading devices_deferred debugfs attribute. * It replaces code sequence:: * * if (err != -EPROBE_DEFER) * dev_err(dev, ...); * else * dev_dbg(dev, ...); * return err; * * with:: * * return dev_err_probe(dev, err, ...); * * Returns @err. * */ int dev_err_probe(const struct device *dev, int err, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; if (err != -EPROBE_DEFER) { dev_err(dev, "error %pe: %pV", ERR_PTR(err), &vaf); } else { device_set_deferred_probe_reason(dev, &vaf); dev_dbg(dev, "error %pe: %pV", ERR_PTR(err), &vaf); } va_end(args); return err; } EXPORT_SYMBOL_GPL(dev_err_probe); static inline bool fwnode_is_primary(struct fwnode_handle *fwnode) { return fwnode && !IS_ERR(fwnode->secondary); } /** * set_primary_fwnode - Change the primary firmware node of a given device. * @dev: Device to handle. * @fwnode: New primary firmware node of the device. * * Set the device's firmware node pointer to @fwnode, but if a secondary * firmware node of the device is present, preserve it. */ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) { struct device *parent = dev->parent; struct fwnode_handle *fn = dev->fwnode; if (fwnode) { if (fwnode_is_primary(fn)) fn = fn->secondary; if (fn) { WARN_ON(fwnode->secondary); fwnode->secondary = fn; } dev->fwnode = fwnode; } else { if (fwnode_is_primary(fn)) { dev->fwnode = fn->secondary; if (!(parent && fn == parent->fwnode)) fn->secondary = NULL; } else { dev->fwnode = NULL; } } } EXPORT_SYMBOL_GPL(set_primary_fwnode); /** * set_secondary_fwnode - Change the secondary firmware node of a given device. * @dev: Device to handle. * @fwnode: New secondary firmware node of the device. * * If a primary firmware node of the device is present, set its secondary * pointer to @fwnode. Otherwise, set the device's firmware node pointer to * @fwnode. */ void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode) { if (fwnode) fwnode->secondary = ERR_PTR(-ENODEV); if (fwnode_is_primary(dev->fwnode)) dev->fwnode->secondary = fwnode; else dev->fwnode = fwnode; } EXPORT_SYMBOL_GPL(set_secondary_fwnode); /** * device_set_of_node_from_dev - reuse device-tree node of another device * @dev: device whose device-tree node is being set * @dev2: device whose device-tree node is being reused * * Takes another reference to the new device-tree node after first dropping * any reference held to the old node. */ void device_set_of_node_from_dev(struct device *dev, const struct device *dev2) { of_node_put(dev->of_node); dev->of_node = of_node_get(dev2->of_node); dev->of_node_reused = true; } EXPORT_SYMBOL_GPL(device_set_of_node_from_dev); int device_match_name(struct device *dev, const void *name) { return sysfs_streq(dev_name(dev), name); } EXPORT_SYMBOL_GPL(device_match_name); int device_match_of_node(struct device *dev, const void *np) { return dev->of_node == np; } EXPORT_SYMBOL_GPL(device_match_of_node); int device_match_fwnode(struct device *dev, const void *fwnode) { return dev_fwnode(dev) == fwnode; } EXPORT_SYMBOL_GPL(device_match_fwnode); int device_match_devt(struct device *dev, const void *pdevt) { return dev->devt == *(dev_t *)pdevt; } EXPORT_SYMBOL_GPL(device_match_devt); int device_match_acpi_dev(struct device *dev, const void *adev) { return ACPI_COMPANION(dev) == adev; } EXPORT_SYMBOL(device_match_acpi_dev); int device_match_any(struct device *dev, const void *unused) { return 1; } EXPORT_SYMBOL_GPL(device_match_any);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 /* SPDX-License-Identifier: GPL-2.0 */ /* * sysctl.h: General linux system control interface * * Begun 24 March 1995, Stephen Tweedie * **************************************************************** **************************************************************** ** ** WARNING: ** The values in this file are exported to user space via ** the sysctl() binary interface. Do *NOT* change the ** numbering of any existing values here, and do not change ** any numbers within any one set of values. If you have to ** redefine an existing interface, use a new number for it. ** The kernel will then return -ENOTDIR to any application using ** the old binary interface. ** **************************************************************** **************************************************************** */ #ifndef _LINUX_SYSCTL_H #define _LINUX_SYSCTL_H #include <linux/list.h> #include <linux/rcupdate.h> #include <linux/wait.h> #include <linux/rbtree.h> #include <linux/uidgid.h> #include <uapi/linux/sysctl.h> /* For the /proc/sys support */ struct completion; struct ctl_table; struct nsproxy; struct ctl_table_root; struct ctl_table_header; struct ctl_dir; /* Keep the same order as in fs/proc/proc_sysctl.c */ #define SYSCTL_ZERO ((void *)&sysctl_vals[0]) #define SYSCTL_ONE ((void *)&sysctl_vals[1]) #define SYSCTL_INT_MAX ((void *)&sysctl_vals[2]) extern const int sysctl_vals[]; typedef int proc_handler(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); int proc_dostring(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_dointvec(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_douintvec(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_dointvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_douintvec_minmax(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int proc_dointvec_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_dointvec_ms_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_doulongvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, void *, size_t *, loff_t *); int proc_do_large_bitmap(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_do_static_key(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); /* * Register a set of sysctl names by calling register_sysctl_table * with an initialised array of struct ctl_table's. An entry with * NULL procname terminates the table. table->de will be * set up by the registration and need not be initialised in advance. * * sysctl names can be mirrored automatically under /proc/sys. The * procname supplied controls /proc naming. * * The table's mode will be honoured for proc-fs access. * * Leaf nodes in the sysctl tree will be represented by a single file * under /proc; non-leaf nodes will be represented by directories. A * null procname disables /proc mirroring at this node. * * The data and maxlen fields of the ctl_table * struct enable minimal validation of the values being written to be * performed, and the mode field allows minimal authentication. * * There must be a proc_handler routine for any terminal nodes * mirrored under /proc/sys (non-terminals are handled by a built-in * directory handler). Several default handlers are available to * cover common cases. */ /* Support for userspace poll() to watch for changes */ struct ctl_table_poll { atomic_t event; wait_queue_head_t wait; }; static inline void *proc_sys_poll_event(struct ctl_table_poll *poll) { return (void *)(unsigned long)atomic_read(&poll->event); } #define __CTL_TABLE_POLL_INITIALIZER(name) { \ .event = ATOMIC_INIT(0), \ .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait) } #define DEFINE_CTL_TABLE_POLL(name) \ struct ctl_table_poll name = __CTL_TABLE_POLL_INITIALIZER(name) /* A sysctl table is an array of struct ctl_table: */ struct ctl_table { const char *procname; /* Text ID for /proc/sys, or zero */ void *data; int maxlen; umode_t mode; struct ctl_table *child; /* Deprecated */ proc_handler *proc_handler; /* Callback for text formatting */ struct ctl_table_poll *poll; void *extra1; void *extra2; } __randomize_layout; struct ctl_node { struct rb_node node; struct ctl_table_header *header; }; /* struct ctl_table_header is used to maintain dynamic lists of struct ctl_table trees. */ struct ctl_table_header { union { struct { struct ctl_table *ctl_table; int used; int count; int nreg; }; struct rcu_head rcu; }; struct completion *unregistering; struct ctl_table *ctl_table_arg; struct ctl_table_root *root; struct ctl_table_set *set; struct ctl_dir *parent; struct ctl_node *node; struct hlist_head inodes; /* head for proc_inode->sysctl_inodes */ }; struct ctl_dir { /* Header must be at the start of ctl_dir */ struct ctl_table_header header; struct rb_root root; }; struct ctl_table_set { int (*is_seen)(struct ctl_table_set *); struct ctl_dir dir; }; struct ctl_table_root { struct ctl_table_set default_set; struct ctl_table_set *(*lookup)(struct ctl_table_root *root); void (*set_ownership)(struct ctl_table_header *head, struct ctl_table *table, kuid_t *uid, kgid_t *gid); int (*permissions)(struct ctl_table_header *head, struct ctl_table *table); }; /* struct ctl_path describes where in the hierarchy a table is added */ struct ctl_path { const char *procname; }; #ifdef CONFIG_SYSCTL void proc_sys_poll_notify(struct ctl_table_poll *poll); extern void setup_sysctl_set(struct ctl_table_set *p, struct ctl_table_root *root, int (*is_seen)(struct ctl_table_set *)); extern void retire_sysctl_set(struct ctl_table_set *set); struct ctl_table_header *__register_sysctl_table( struct ctl_table_set *set, const char *path, struct ctl_table *table); struct ctl_table_header *__register_sysctl_paths( struct ctl_table_set *set, const struct ctl_path *path, struct ctl_table *table); struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table); struct ctl_table_header *register_sysctl_table(struct ctl_table * table); struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, struct ctl_table *table); void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init(void); void do_sysctl_args(void); extern int pwrsw_enabled; extern int unaligned_enabled; extern int unaligned_dump_stack; extern int no_unaligned_warning; extern struct ctl_table sysctl_mount_point[]; extern struct ctl_table random_table[]; extern struct ctl_table firmware_config_table[]; extern struct ctl_table epoll_table[]; #else /* CONFIG_SYSCTL */ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) { return NULL; } static inline struct ctl_table_header *register_sysctl_paths( const struct ctl_path *path, struct ctl_table *table) { return NULL; } static inline struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table) { return NULL; } static inline void unregister_sysctl_table(struct ctl_table_header * table) { } static inline void setup_sysctl_set(struct ctl_table_set *p, struct ctl_table_root *root, int (*is_seen)(struct ctl_table_set *)) { } static inline void do_sysctl_args(void) { } #endif /* CONFIG_SYSCTL */ int sysctl_max_threads(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); #endif /* _LINUX_SYSCTL_H */
1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178