1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 /* SPDX-License-Identifier: GPL-2.0 */ /* * Definitions of structures and functions for quota formats using trie */ #ifndef _LINUX_DQBLK_QTREE_H #define _LINUX_DQBLK_QTREE_H #include <linux/types.h> /* Numbers of blocks needed for updates - we count with the smallest * possible block size (1024) */ #define QTREE_INIT_ALLOC 4 #define QTREE_INIT_REWRITE 2 #define QTREE_DEL_ALLOC 0 #define QTREE_DEL_REWRITE 6 struct dquot; struct kqid; /* Operations */ struct qtree_fmt_operations { void (*mem2disk_dqblk)(void *disk, struct dquot *dquot); /* Convert given entry from in memory format to disk one */ void (*disk2mem_dqblk)(struct dquot *dquot, void *disk); /* Convert given entry from disk format to in memory one */ int (*is_id)(void *disk, struct dquot *dquot); /* Is this structure for given id? */ }; /* Inmemory copy of version specific information */ struct qtree_mem_dqinfo { struct super_block *dqi_sb; /* Sb quota is on */ int dqi_type; /* Quota type */ unsigned int dqi_blocks; /* # of blocks in quota file */ unsigned int dqi_free_blk; /* First block in list of free blocks */ unsigned int dqi_free_entry; /* First block with free entry */ unsigned int dqi_blocksize_bits; /* Block size of quota file */ unsigned int dqi_entry_size; /* Size of quota entry in quota file */ unsigned int dqi_usable_bs; /* Space usable in block for quota data */ unsigned int dqi_qtree_depth; /* Precomputed depth of quota tree */ const struct qtree_fmt_operations *dqi_ops; /* Operations for entry manipulation */ }; int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot); int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot); int qtree_delete_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot); int qtree_release_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot); int qtree_entry_unused(struct qtree_mem_dqinfo *info, char *disk); static inline int qtree_depth(struct qtree_mem_dqinfo *info) { unsigned int epb = info->dqi_usable_bs >> 2; unsigned long long entries = epb; int i; for (i = 1; entries < (1ULL << 32); i++) entries *= epb; return i; } int qtree_get_next_id(struct qtree_mem_dqinfo *info, struct kqid *qid); #endif /* _LINUX_DQBLK_QTREE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_RTNETLINK_H #define __NET_RTNETLINK_H #include <linux/rtnetlink.h> #include <net/netlink.h> typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, struct netlink_ext_ack *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); enum rtnl_link_flags { RTNL_FLAG_DOIT_UNLOCKED = 1, }; void rtnl_register(int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); int rtnl_register_module(struct module *owner, int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); int rtnl_unregister(int protocol, int msgtype); void rtnl_unregister_all(int protocol); static inline int rtnl_msg_family(const struct nlmsghdr *nlh) { if (nlmsg_len(nlh) >= sizeof(struct rtgenmsg)) return ((struct rtgenmsg *) nlmsg_data(nlh))->rtgen_family; else return AF_UNSPEC; } /** * struct rtnl_link_ops - rtnetlink link operations * * @list: Used internally * @kind: Identifier * @netns_refund: Physical device, move to init_net on netns exit * @maxtype: Highest device specific netlink attribute number * @policy: Netlink policy for device specific attribute validation * @validate: Optional validation function for netlink/changelink parameters * @priv_size: sizeof net_device private space * @setup: net_device setup function * @newlink: Function for configuring and registering a new device * @changelink: Function for changing parameters of an existing device * @dellink: Function to remove a device * @get_size: Function to calculate required room for dumping device * specific netlink attributes * @fill_info: Function to dump device specific netlink attributes * @get_xstats_size: Function to calculate required room for dumping device * specific statistics * @fill_xstats: Function to dump device specific statistics * @get_num_tx_queues: Function to determine number of transmit queues * to create when creating a new device. * @get_num_rx_queues: Function to determine number of receive queues * to create when creating a new device. * @get_link_net: Function to get the i/o netns of the device * @get_linkxstats_size: Function to calculate the required room for * dumping device-specific extended link stats * @fill_linkxstats: Function to dump device-specific extended link stats */ struct rtnl_link_ops { struct list_head list; const char *kind; size_t priv_size; void (*setup)(struct net_device *dev); bool netns_refund; unsigned int maxtype; const struct nla_policy *policy; int (*validate)(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack); int (*newlink)(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack); int (*changelink)(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack); void (*dellink)(struct net_device *dev, struct list_head *head); size_t (*get_size)(const struct net_device *dev); int (*fill_info)(struct sk_buff *skb, const struct net_device *dev); size_t (*get_xstats_size)(const struct net_device *dev); int (*fill_xstats)(struct sk_buff *skb, const struct net_device *dev); unsigned int (*get_num_tx_queues)(void); unsigned int (*get_num_rx_queues)(void); unsigned int slave_maxtype; const struct nla_policy *slave_policy; int (*slave_changelink)(struct net_device *dev, struct net_device *slave_dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack); size_t (*get_slave_size)(const struct net_device *dev, const struct net_device *slave_dev); int (*fill_slave_info)(struct sk_buff *skb, const struct net_device *dev, const struct net_device *slave_dev); struct net *(*get_link_net)(const struct net_device *dev); size_t (*get_linkxstats_size)(const struct net_device *dev, int attr); int (*fill_linkxstats)(struct sk_buff *skb, const struct net_device *dev, int *prividx, int attr); }; int __rtnl_link_register(struct rtnl_link_ops *ops); void __rtnl_link_unregister(struct rtnl_link_ops *ops); int rtnl_link_register(struct rtnl_link_ops *ops); void rtnl_link_unregister(struct rtnl_link_ops *ops); /** * struct rtnl_af_ops - rtnetlink address family operations * * @list: Used internally * @family: Address family * @fill_link_af: Function to fill IFLA_AF_SPEC with address family * specific netlink attributes. * @get_link_af_size: Function to calculate size of address family specific * netlink attributes. * @validate_link_af: Validate a IFLA_AF_SPEC attribute, must check attr * for invalid configuration settings. * @set_link_af: Function to parse a IFLA_AF_SPEC attribute and modify * net_device accordingly. */ struct rtnl_af_ops { struct list_head list; int family; int (*fill_link_af)(struct sk_buff *skb, const struct net_device *dev, u32 ext_filter_mask); size_t (*get_link_af_size)(const struct net_device *dev, u32 ext_filter_mask); int (*validate_link_af)(const struct net_device *dev, const struct nlattr *attr); int (*set_link_af)(struct net_device *dev, const struct nlattr *attr); int (*fill_stats_af)(struct sk_buff *skb, const struct net_device *dev); size_t (*get_stats_af_size)(const struct net_device *dev); }; void rtnl_af_register(struct rtnl_af_ops *ops); void rtnl_af_unregister(struct rtnl_af_ops *ops); struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]); struct net_device *rtnl_create_link(struct net *net, const char *ifname, unsigned char name_assign_type, const struct rtnl_link_ops *ops, struct nlattr *tb[], struct netlink_ext_ack *extack); int rtnl_delete_link(struct net_device *dev); int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm); int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, struct netlink_ext_ack *exterr); struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid); #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) #endif
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PAGE_64_H #define _ASM_X86_PAGE_64_H #include <asm/page_64_types.h> #ifndef __ASSEMBLY__ #include <asm/alternative.h> /* duplicated to the one in bootmem.h */ extern unsigned long max_pfn; extern unsigned long phys_base; extern unsigned long page_offset_base; extern unsigned long vmalloc_base; extern unsigned long vmemmap_base; static inline unsigned long __phys_addr_nodebug(unsigned long x) { unsigned long y = x - __START_KERNEL_map; /* use the carry flag to determine if x was < __START_KERNEL_map */ x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET)); return x; } #ifdef CONFIG_DEBUG_VIRTUAL extern unsigned long __phys_addr(unsigned long); extern unsigned long __phys_addr_symbol(unsigned long); #else #define __phys_addr(x) __phys_addr_nodebug(x) #define __phys_addr_symbol(x) \ ((unsigned long)(x) - __START_KERNEL_map + phys_base) #endif #define __phys_reloc_hide(x) (x) #ifdef CONFIG_FLATMEM #define pfn_valid(pfn) ((pfn) < max_pfn) #endif void clear_page_orig(void *page); void clear_page_rep(void *page); void clear_page_erms(void *page); static inline void clear_page(void *page) { alternative_call_2(clear_page_orig, clear_page_rep, X86_FEATURE_REP_GOOD, clear_page_erms, X86_FEATURE_ERMS, "=D" (page), "0" (page) : "cc", "memory", "rax", "rcx"); } void copy_page(void *to, void *from); #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_X86_VSYSCALL_EMULATION # define __HAVE_ARCH_GATE_AREA 1 #endif #endif /* _ASM_X86_PAGE_64_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TTY_H #define _LINUX_TTY_H #include <linux/fs.h> #include <linux/major.h> #include <linux/termios.h> #include <linux/workqueue.h> #include <linux/tty_driver.h> #include <linux/tty_ldisc.h> #include <linux/mutex.h> #include <linux/tty_flags.h> #include <linux/seq_file.h> #include <uapi/linux/tty.h> #include <linux/rwsem.h> #include <linux/llist.h> /* * Lock subclasses for tty locks * * TTY_LOCK_NORMAL is for normal ttys and master ptys. * TTY_LOCK_SLAVE is for slave ptys only. * * Lock subclasses are necessary for handling nested locking with pty pairs. * tty locks which use nested locking: * * legacy_mutex - Nested tty locks are necessary for releasing pty pairs. * The stable lock order is master pty first, then slave pty. * termios_rwsem - The stable lock order is tty_buffer lock->termios_rwsem. * Subclassing this lock enables the slave pty to hold its * termios_rwsem when claiming the master tty_buffer lock. * tty_buffer lock - slave ptys can claim nested buffer lock when handling * signal chars. The stable lock order is slave pty, then * master. */ enum { TTY_LOCK_NORMAL = 0, TTY_LOCK_SLAVE, }; /* * (Note: the *_driver.minor_start values 1, 64, 128, 192 are * hardcoded at present.) */ #define NR_UNIX98_PTY_DEFAULT 4096 /* Default maximum for Unix98 ptys */ #define NR_UNIX98_PTY_RESERVE 1024 /* Default reserve for main devpts */ #define NR_UNIX98_PTY_MAX (1 << MINORBITS) /* Absolute limit */ /* * This character is the same as _POSIX_VDISABLE: it cannot be used as * a c_cc[] character, but indicates that a particular special character * isn't in use (eg VINTR has no character etc) */ #define __DISABLED_CHAR '\0' struct tty_buffer { union { struct tty_buffer *next; struct llist_node free; }; int used; int size; int commit; int read; int flags; /* Data points here */ unsigned long data[]; }; /* Values for .flags field of tty_buffer */ #define TTYB_NORMAL 1 /* buffer has no flags buffer */ static inline unsigned char *char_buf_ptr(struct tty_buffer *b, int ofs) { return ((unsigned char *)b->data) + ofs; } static inline char *flag_buf_ptr(struct tty_buffer *b, int ofs) { return (char *)char_buf_ptr(b, ofs) + b->size; } struct tty_bufhead { struct tty_buffer *head; /* Queue head */ struct work_struct work; struct mutex lock; atomic_t priority; struct tty_buffer sentinel; struct llist_head free; /* Free queue head */ atomic_t mem_used; /* In-use buffers excluding free list */ int mem_limit; struct tty_buffer *tail; /* Active buffer */ }; /* * When a break, frame error, or parity error happens, these codes are * stuffed into the flags buffer. */ #define TTY_NORMAL 0 #define TTY_BREAK 1 #define TTY_FRAME 2 #define TTY_PARITY 3 #define TTY_OVERRUN 4 #define INTR_CHAR(tty) ((tty)->termios.c_cc[VINTR]) #define QUIT_CHAR(tty) ((tty)->termios.c_cc[VQUIT]) #define ERASE_CHAR(tty) ((tty)->termios.c_cc[VERASE]) #define KILL_CHAR(tty) ((tty)->termios.c_cc[VKILL]) #define EOF_CHAR(tty) ((tty)->termios.c_cc[VEOF]) #define TIME_CHAR(tty) ((tty)->termios.c_cc[VTIME]) #define MIN_CHAR(tty) ((tty)->termios.c_cc[VMIN]) #define SWTC_CHAR(tty) ((tty)->termios.c_cc[VSWTC]) #define START_CHAR(tty) ((tty)->termios.c_cc[VSTART]) #define STOP_CHAR(tty) ((tty)->termios.c_cc[VSTOP]) #define SUSP_CHAR(tty) ((tty)->termios.c_cc[VSUSP]) #define EOL_CHAR(tty) ((tty)->termios.c_cc[VEOL]) #define REPRINT_CHAR(tty) ((tty)->termios.c_cc[VREPRINT]) #define DISCARD_CHAR(tty) ((tty)->termios.c_cc[VDISCARD]) #define WERASE_CHAR(tty) ((tty)->termios.c_cc[VWERASE]) #define LNEXT_CHAR(tty) ((tty)->termios.c_cc[VLNEXT]) #define EOL2_CHAR(tty) ((tty)->termios.c_cc[VEOL2]) #define _I_FLAG(tty, f) ((tty)->termios.c_iflag & (f)) #define _O_FLAG(tty, f) ((tty)->termios.c_oflag & (f)) #define _C_FLAG(tty, f) ((tty)->termios.c_cflag & (f)) #define _L_FLAG(tty, f) ((tty)->termios.c_lflag & (f)) #define I_IGNBRK(tty) _I_FLAG((tty), IGNBRK) #define I_BRKINT(tty) _I_FLAG((tty), BRKINT) #define I_IGNPAR(tty) _I_FLAG((tty), IGNPAR) #define I_PARMRK(tty) _I_FLAG((tty), PARMRK) #define I_INPCK(tty) _I_FLAG((tty), INPCK) #define I_ISTRIP(tty) _I_FLAG((tty), ISTRIP) #define I_INLCR(tty) _I_FLAG((tty), INLCR) #define I_IGNCR(tty) _I_FLAG((tty), IGNCR) #define I_ICRNL(tty) _I_FLAG((tty), ICRNL) #define I_IUCLC(tty) _I_FLAG((tty), IUCLC) #define I_IXON(tty) _I_FLAG((tty), IXON) #define I_IXANY(tty) _I_FLAG((tty), IXANY) #define I_IXOFF(tty) _I_FLAG((tty), IXOFF) #define I_IMAXBEL(tty) _I_FLAG((tty), IMAXBEL) #define I_IUTF8(tty) _I_FLAG((tty), IUTF8) #define O_OPOST(tty) _O_FLAG((tty), OPOST) #define O_OLCUC(tty) _O_FLAG((tty), OLCUC) #define O_ONLCR(tty) _O_FLAG((tty), ONLCR) #define O_OCRNL(tty) _O_FLAG((tty), OCRNL) #define O_ONOCR(tty) _O_FLAG((tty), ONOCR) #define O_ONLRET(tty) _O_FLAG((tty), ONLRET) #define O_OFILL(tty) _O_FLAG((tty), OFILL) #define O_OFDEL(tty) _O_FLAG((tty), OFDEL) #define O_NLDLY(tty) _O_FLAG((tty), NLDLY) #define O_CRDLY(tty) _O_FLAG((tty), CRDLY) #define O_TABDLY(tty) _O_FLAG((tty), TABDLY) #define O_BSDLY(tty) _O_FLAG((tty), BSDLY) #define O_VTDLY(tty) _O_FLAG((tty), VTDLY) #define O_FFDLY(tty) _O_FLAG((tty), FFDLY) #define C_BAUD(tty) _C_FLAG((tty), CBAUD) #define C_CSIZE(tty) _C_FLAG((tty), CSIZE) #define C_CSTOPB(tty) _C_FLAG((tty), CSTOPB) #define C_CREAD(tty) _C_FLAG((tty), CREAD) #define C_PARENB(tty) _C_FLAG((tty), PARENB) #define C_PARODD(tty) _C_FLAG((tty), PARODD) #define C_HUPCL(tty) _C_FLAG((tty), HUPCL) #define C_CLOCAL(tty) _C_FLAG((tty), CLOCAL) #define C_CIBAUD(tty) _C_FLAG((tty), CIBAUD) #define C_CRTSCTS(tty) _C_FLAG((tty), CRTSCTS) #define C_CMSPAR(tty) _C_FLAG((tty), CMSPAR) #define L_ISIG(tty) _L_FLAG((tty), ISIG) #define L_ICANON(tty) _L_FLAG((tty), ICANON) #define L_XCASE(tty) _L_FLAG((tty), XCASE) #define L_ECHO(tty) _L_FLAG((tty), ECHO) #define L_ECHOE(tty) _L_FLAG((tty), ECHOE) #define L_ECHOK(tty) _L_FLAG((tty), ECHOK) #define L_ECHONL(tty) _L_FLAG((tty), ECHONL) #define L_NOFLSH(tty) _L_FLAG((tty), NOFLSH) #define L_TOSTOP(tty) _L_FLAG((tty), TOSTOP) #define L_ECHOCTL(tty) _L_FLAG((tty), ECHOCTL) #define L_ECHOPRT(tty) _L_FLAG((tty), ECHOPRT) #define L_ECHOKE(tty) _L_FLAG((tty), ECHOKE) #define L_FLUSHO(tty) _L_FLAG((tty), FLUSHO) #define L_PENDIN(tty) _L_FLAG((tty), PENDIN) #define L_IEXTEN(tty) _L_FLAG((tty), IEXTEN) #define L_EXTPROC(tty) _L_FLAG((tty), EXTPROC) struct device; struct signal_struct; /* * Port level information. Each device keeps its own port level information * so provide a common structure for those ports wanting to use common support * routines. * * The tty port has a different lifetime to the tty so must be kept apart. * In addition be careful as tty -> port mappings are valid for the life * of the tty object but in many cases port -> tty mappings are valid only * until a hangup so don't use the wrong path. */ struct tty_port; struct tty_port_operations { /* Return 1 if the carrier is raised */ int (*carrier_raised)(struct tty_port *port); /* Control the DTR line */ void (*dtr_rts)(struct tty_port *port, int raise); /* Called when the last close completes or a hangup finishes IFF the port was initialized. Do not use to free resources. Called under the port mutex to serialize against activate/shutdowns */ void (*shutdown)(struct tty_port *port); /* Called under the port mutex from tty_port_open, serialized using the port mutex */ /* FIXME: long term getting the tty argument *out* of this would be good for consoles */ int (*activate)(struct tty_port *port, struct tty_struct *tty); /* Called on the final put of a port */ void (*destruct)(struct tty_port *port); }; struct tty_port_client_operations { int (*receive_buf)(struct tty_port *port, const unsigned char *, const unsigned char *, size_t); void (*write_wakeup)(struct tty_port *port); }; extern const struct tty_port_client_operations tty_port_default_client_ops; struct tty_port { struct tty_bufhead buf; /* Locked internally */ struct tty_struct *tty; /* Back pointer */ struct tty_struct *itty; /* internal back ptr */ const struct tty_port_operations *ops; /* Port operations */ const struct tty_port_client_operations *client_ops; /* Port client operations */ spinlock_t lock; /* Lock protecting tty field */ int blocked_open; /* Waiting to open */ int count; /* Usage count */ wait_queue_head_t open_wait; /* Open waiters */ wait_queue_head_t delta_msr_wait; /* Modem status change */ unsigned long flags; /* User TTY flags ASYNC_ */ unsigned long iflags; /* Internal flags TTY_PORT_ */ unsigned char console:1, /* port is a console */ low_latency:1; /* optional: tune for latency */ struct mutex mutex; /* Locking */ struct mutex buf_mutex; /* Buffer alloc lock */ unsigned char *xmit_buf; /* Optional buffer */ unsigned int close_delay; /* Close port delay */ unsigned int closing_wait; /* Delay for output */ int drain_delay; /* Set to zero if no pure time based drain is needed else set to size of fifo */ struct kref kref; /* Ref counter */ void *client_data; }; /* tty_port::iflags bits -- use atomic bit ops */ #define TTY_PORT_INITIALIZED 0 /* device is initialized */ #define TTY_PORT_SUSPENDED 1 /* device is suspended */ #define TTY_PORT_ACTIVE 2 /* device is open */ /* * uart drivers: use the uart_port::status field and the UPSTAT_* defines * for s/w-based flow control steering and carrier detection status */ #define TTY_PORT_CTS_FLOW 3 /* h/w flow control enabled */ #define TTY_PORT_CHECK_CD 4 /* carrier detect enabled */ #define TTY_PORT_KOPENED 5 /* device exclusively opened by kernel */ /* * Where all of the state associated with a tty is kept while the tty * is open. Since the termios state should be kept even if the tty * has been closed --- for things like the baud rate, etc --- it is * not stored here, but rather a pointer to the real state is stored * here. Possible the winsize structure should have the same * treatment, but (1) the default 80x24 is usually right and (2) it's * most often used by a windowing system, which will set the correct * size each time the window is created or resized anyway. * - TYT, 9/14/92 */ struct tty_operations; struct tty_struct { int magic; struct kref kref; struct device *dev; struct tty_driver *driver; const struct tty_operations *ops; int index; /* Protects ldisc changes: Lock tty not pty */ struct ld_semaphore ldisc_sem; struct tty_ldisc *ldisc; struct mutex atomic_write_lock; struct mutex legacy_mutex; struct mutex throttle_mutex; struct rw_semaphore termios_rwsem; struct mutex winsize_mutex; spinlock_t ctrl_lock; spinlock_t flow_lock; /* Termios values are protected by the termios rwsem */ struct ktermios termios, termios_locked; char name[64]; struct pid *pgrp; /* Protected by ctrl lock */ /* * Writes protected by both ctrl lock and legacy mutex, readers must use * at least one of them. */ struct pid *session; unsigned long flags; int count; struct winsize winsize; /* winsize_mutex */ unsigned long stopped:1, /* flow_lock */ flow_stopped:1, unused:BITS_PER_LONG - 2; int hw_stopped; unsigned long ctrl_status:8, /* ctrl_lock */ packet:1, unused_ctrl:BITS_PER_LONG - 9; unsigned int receive_room; /* Bytes free for queue */ int flow_change; struct tty_struct *link; struct fasync_struct *fasync; wait_queue_head_t write_wait; wait_queue_head_t read_wait; struct work_struct hangup_work; void *disc_data; void *driver_data; spinlock_t files_lock; /* protects tty_files list */ struct list_head tty_files; #define N_TTY_BUF_SIZE 4096 int closing; unsigned char *write_buf; int write_cnt; /* If the tty has a pending do_SAK, queue it here - akpm */ struct work_struct SAK_work; struct tty_port *port; } __randomize_layout; /* Each of a tty's open files has private_data pointing to tty_file_private */ struct tty_file_private { struct tty_struct *tty; struct file *file; struct list_head list; }; /* tty magic number */ #define TTY_MAGIC 0x5401 /* * These bits are used in the flags field of the tty structure. * * So that interrupts won't be able to mess up the queues, * copy_to_cooked must be atomic with respect to itself, as must * tty->write. Thus, you must use the inline functions set_bit() and * clear_bit() to make things atomic. */ #define TTY_THROTTLED 0 /* Call unthrottle() at threshold min */ #define TTY_IO_ERROR 1 /* Cause an I/O error (may be no ldisc too) */ #define TTY_OTHER_CLOSED 2 /* Other side (if any) has closed */ #define TTY_EXCLUSIVE 3 /* Exclusive open mode */ #define TTY_DO_WRITE_WAKEUP 5 /* Call write_wakeup after queuing new */ #define TTY_LDISC_OPEN 11 /* Line discipline is open */ #define TTY_PTY_LOCK 16 /* pty private */ #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ #define TTY_HUPPED 18 /* Post driver->hangup() */ #define TTY_HUPPING 19 /* Hangup in progress */ #define TTY_LDISC_CHANGING 20 /* Change pending - non-block IO */ #define TTY_LDISC_HALTED 22 /* Line discipline is halted */ /* Values for tty->flow_change */ #define TTY_THROTTLE_SAFE 1 #define TTY_UNTHROTTLE_SAFE 2 static inline void __tty_set_flow_change(struct tty_struct *tty, int val) { tty->flow_change = val; } static inline void tty_set_flow_change(struct tty_struct *tty, int val) { tty->flow_change = val; smp_mb(); } static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file) { return file->f_flags & O_NONBLOCK || test_bit(TTY_LDISC_CHANGING, &tty->flags); } static inline bool tty_io_error(struct tty_struct *tty) { return test_bit(TTY_IO_ERROR, &tty->flags); } static inline bool tty_throttled(struct tty_struct *tty) { return test_bit(TTY_THROTTLED, &tty->flags); } #ifdef CONFIG_TTY extern void tty_kref_put(struct tty_struct *tty); extern struct pid *tty_get_pgrp(struct tty_struct *tty); extern void tty_vhangup_self(void); extern void disassociate_ctty(int priv); extern dev_t tty_devnum(struct tty_struct *tty); extern void proc_clear_tty(struct task_struct *p); extern struct tty_struct *get_current_tty(void); /* tty_io.c */ extern int __init tty_init(void); extern const char *tty_name(const struct tty_struct *tty); extern struct tty_struct *tty_kopen(dev_t device); extern void tty_kclose(struct tty_struct *tty); extern int tty_dev_name_to_number(const char *name, dev_t *number); extern int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout); extern void tty_ldisc_unlock(struct tty_struct *tty); extern ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *); #else static inline void tty_kref_put(struct tty_struct *tty) { } static inline struct pid *tty_get_pgrp(struct tty_struct *tty) { return NULL; } static inline void tty_vhangup_self(void) { } static inline void disassociate_ctty(int priv) { } static inline dev_t tty_devnum(struct tty_struct *tty) { return 0; } static inline void proc_clear_tty(struct task_struct *p) { } static inline struct tty_struct *get_current_tty(void) { return NULL; } /* tty_io.c */ static inline int __init tty_init(void) { return 0; } static inline const char *tty_name(const struct tty_struct *tty) { return "(none)"; } static inline struct tty_struct *tty_kopen(dev_t device) { return ERR_PTR(-ENODEV); } static inline void tty_kclose(struct tty_struct *tty) { } static inline int tty_dev_name_to_number(const char *name, dev_t *number) { return -ENOTSUPP; } #endif extern struct ktermios tty_std_termios; extern int vcs_init(void); extern struct class *tty_class; /** * tty_kref_get - get a tty reference * @tty: tty device * * Return a new reference to a tty object. The caller must hold * sufficient locks/counts to ensure that their existing reference cannot * go away */ static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) { if (tty) kref_get(&tty->kref); return tty; } extern const char *tty_driver_name(const struct tty_struct *tty); extern void tty_wait_until_sent(struct tty_struct *tty, long timeout); extern int __tty_check_change(struct tty_struct *tty, int sig); extern int tty_check_change(struct tty_struct *tty); extern void __stop_tty(struct tty_struct *tty); extern void stop_tty(struct tty_struct *tty); extern void __start_tty(struct tty_struct *tty); extern void start_tty(struct tty_struct *tty); extern int tty_register_driver(struct tty_driver *driver); extern int tty_unregister_driver(struct tty_driver *driver); extern struct device *tty_register_device(struct tty_driver *driver, unsigned index, struct device *dev); extern struct device *tty_register_device_attr(struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp); extern void tty_unregister_device(struct tty_driver *driver, unsigned index); extern void tty_write_message(struct tty_struct *tty, char *msg); extern int tty_send_xchar(struct tty_struct *tty, char ch); extern int tty_put_char(struct tty_struct *tty, unsigned char c); extern int tty_chars_in_buffer(struct tty_struct *tty); extern int tty_write_room(struct tty_struct *tty); extern void tty_driver_flush_buffer(struct tty_struct *tty); extern void tty_throttle(struct tty_struct *tty); extern void tty_unthrottle(struct tty_struct *tty); extern int tty_throttle_safe(struct tty_struct *tty); extern int tty_unthrottle_safe(struct tty_struct *tty); extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws); extern int is_current_pgrp_orphaned(void); extern void tty_hangup(struct tty_struct *tty); extern void tty_vhangup(struct tty_struct *tty); extern void tty_vhangup_session(struct tty_struct *tty); extern int tty_hung_up_p(struct file *filp); extern void do_SAK(struct tty_struct *tty); extern void __do_SAK(struct tty_struct *tty); extern void tty_open_proc_set_tty(struct file *filp, struct tty_struct *tty); extern int tty_signal_session_leader(struct tty_struct *tty, int exit_session); extern void session_clear_tty(struct pid *session); extern void no_tty(void); extern void tty_buffer_free_all(struct tty_port *port); extern void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld); extern void tty_buffer_init(struct tty_port *port); extern void tty_buffer_set_lock_subclass(struct tty_port *port); extern bool tty_buffer_restart_work(struct tty_port *port); extern bool tty_buffer_cancel_work(struct tty_port *port); extern void tty_buffer_flush_work(struct tty_port *port); extern speed_t tty_termios_baud_rate(struct ktermios *termios); extern speed_t tty_termios_input_baud_rate(struct ktermios *termios); extern void tty_termios_encode_baud_rate(struct ktermios *termios, speed_t ibaud, speed_t obaud); extern void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud, speed_t obaud); /** * tty_get_baud_rate - get tty bit rates * @tty: tty to query * * Returns the baud rate as an integer for this terminal. The * termios lock must be held by the caller and the terminal bit * flags may be updated. * * Locking: none */ static inline speed_t tty_get_baud_rate(struct tty_struct *tty) { return tty_termios_baud_rate(&tty->termios); } extern void tty_termios_copy_hw(struct ktermios *new, struct ktermios *old); extern int tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b); extern int tty_set_termios(struct tty_struct *tty, struct ktermios *kt); extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *); extern void tty_ldisc_deref(struct tty_ldisc *); extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *); extern void tty_ldisc_hangup(struct tty_struct *tty, bool reset); extern int tty_ldisc_reinit(struct tty_struct *tty, int disc); extern const struct seq_operations tty_ldiscs_seq_ops; extern void tty_wakeup(struct tty_struct *tty); extern void tty_ldisc_flush(struct tty_struct *tty); extern long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg); extern int tty_mode_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg); extern long tty_jobctrl_ioctl(struct tty_struct *tty, struct tty_struct *real_tty, struct file *file, unsigned int cmd, unsigned long arg); extern int tty_perform_flush(struct tty_struct *tty, unsigned long arg); extern void tty_default_fops(struct file_operations *fops); extern struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx); extern int tty_alloc_file(struct file *file); extern void tty_add_file(struct tty_struct *tty, struct file *file); extern void tty_free_file(struct file *file); extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx); extern void tty_release_struct(struct tty_struct *tty, int idx); extern int tty_release(struct inode *inode, struct file *filp); extern void tty_init_termios(struct tty_struct *tty); extern void tty_save_termios(struct tty_struct *tty); extern int tty_standard_install(struct tty_driver *driver, struct tty_struct *tty); extern struct mutex tty_mutex; #define tty_is_writelocked(tty) (mutex_is_locked(&tty->atomic_write_lock)) extern void tty_port_init(struct tty_port *port); extern void tty_port_link_device(struct tty_port *port, struct tty_driver *driver, unsigned index); extern struct device *tty_port_register_device(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device); extern struct device *tty_port_register_device_attr(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp); extern struct device *tty_port_register_device_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device); extern struct device *tty_port_register_device_attr_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp); extern void tty_port_unregister_device(struct tty_port *port, struct tty_driver *driver, unsigned index); extern int tty_port_alloc_xmit_buf(struct tty_port *port); extern void tty_port_free_xmit_buf(struct tty_port *port); extern void tty_port_destroy(struct tty_port *port); extern void tty_port_put(struct tty_port *port); static inline struct tty_port *tty_port_get(struct tty_port *port) { if (port && kref_get_unless_zero(&port->kref)) return port; return NULL; } /* If the cts flow control is enabled, return true. */ static inline bool tty_port_cts_enabled(struct tty_port *port) { return test_bit(TTY_PORT_CTS_FLOW, &port->iflags); } static inline void tty_port_set_cts_flow(struct tty_port *port, bool val) { if (val) set_bit(TTY_PORT_CTS_FLOW, &port->iflags); else clear_bit(TTY_PORT_CTS_FLOW, &port->iflags); } static inline bool tty_port_active(struct tty_port *port) { return test_bit(TTY_PORT_ACTIVE, &port->iflags); } static inline void tty_port_set_active(struct tty_port *port, bool val) { if (val) set_bit(TTY_PORT_ACTIVE, &port->iflags); else clear_bit(TTY_PORT_ACTIVE, &port->iflags); } static inline bool tty_port_check_carrier(struct tty_port *port) { return test_bit(TTY_PORT_CHECK_CD, &port->iflags); } static inline void tty_port_set_check_carrier(struct tty_port *port, bool val) { if (val) set_bit(TTY_PORT_CHECK_CD, &port->iflags); else clear_bit(TTY_PORT_CHECK_CD, &port->iflags); } static inline bool tty_port_suspended(struct tty_port *port) { return test_bit(TTY_PORT_SUSPENDED, &port->iflags); } static inline void tty_port_set_suspended(struct tty_port *port, bool val) { if (val) set_bit(TTY_PORT_SUSPENDED, &port->iflags); else clear_bit(TTY_PORT_SUSPENDED, &port->iflags); } static inline bool tty_port_initialized(struct tty_port *port) { return test_bit(TTY_PORT_INITIALIZED, &port->iflags); } static inline void tty_port_set_initialized(struct tty_port *port, bool val) { if (val) set_bit(TTY_PORT_INITIALIZED, &port->iflags); else clear_bit(TTY_PORT_INITIALIZED, &port->iflags); } static inline bool tty_port_kopened(struct tty_port *port) { return test_bit(TTY_PORT_KOPENED, &port->iflags); } static inline void tty_port_set_kopened(struct tty_port *port, bool val) { if (val) set_bit(TTY_PORT_KOPENED, &port->iflags); else clear_bit(TTY_PORT_KOPENED, &port->iflags); } extern struct tty_struct *tty_port_tty_get(struct tty_port *port); extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); extern int tty_port_carrier_raised(struct tty_port *port); extern void tty_port_raise_dtr_rts(struct tty_port *port); extern void tty_port_lower_dtr_rts(struct tty_port *port); extern void tty_port_hangup(struct tty_port *port); extern void tty_port_tty_hangup(struct tty_port *port, bool check_clocal); extern void tty_port_tty_wakeup(struct tty_port *port); extern int tty_port_block_til_ready(struct tty_port *port, struct tty_struct *tty, struct file *filp); extern int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct file *filp); extern void tty_port_close_end(struct tty_port *port, struct tty_struct *tty); extern void tty_port_close(struct tty_port *port, struct tty_struct *tty, struct file *filp); extern int tty_port_install(struct tty_port *port, struct tty_driver *driver, struct tty_struct *tty); extern int tty_port_open(struct tty_port *port, struct tty_struct *tty, struct file *filp); static inline int tty_port_users(struct tty_port *port) { return port->count + port->blocked_open; } extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc); extern int tty_unregister_ldisc(int disc); extern int tty_set_ldisc(struct tty_struct *tty, int disc); extern int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty); extern void tty_ldisc_release(struct tty_struct *tty); extern int __must_check tty_ldisc_init(struct tty_struct *tty); extern void tty_ldisc_deinit(struct tty_struct *tty); extern int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p, char *f, int count); /* n_tty.c */ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops); #ifdef CONFIG_TTY extern void __init n_tty_init(void); #else static inline void n_tty_init(void) { } #endif /* tty_audit.c */ #ifdef CONFIG_AUDIT extern void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size); extern void tty_audit_exit(void); extern void tty_audit_fork(struct signal_struct *sig); extern void tty_audit_tiocsti(struct tty_struct *tty, char ch); extern int tty_audit_push(void); #else static inline void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size) { } static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) { } static inline void tty_audit_exit(void) { } static inline void tty_audit_fork(struct signal_struct *sig) { } static inline int tty_audit_push(void) { return 0; } #endif /* tty_ioctl.c */ extern int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg); /* vt.c */ extern int vt_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg); extern long vt_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg); /* tty_mutex.c */ /* functions for preparation of BKL removal */ extern void tty_lock(struct tty_struct *tty); extern int tty_lock_interruptible(struct tty_struct *tty); extern void tty_unlock(struct tty_struct *tty); extern void tty_lock_slave(struct tty_struct *tty); extern void tty_unlock_slave(struct tty_struct *tty); extern void tty_set_lock_subclass(struct tty_struct *tty); #ifdef CONFIG_PROC_FS extern void proc_tty_register_driver(struct tty_driver *); extern void proc_tty_unregister_driver(struct tty_driver *); #else static inline void proc_tty_register_driver(struct tty_driver *d) {} static inline void proc_tty_unregister_driver(struct tty_driver *d) {} #endif #define tty_msg(fn, tty, f, ...) \ fn("%s %s: " f, tty_driver_name(tty), tty_name(tty), ##__VA_ARGS__) #define tty_debug(tty, f, ...) tty_msg(pr_debug, tty, f, ##__VA_ARGS__) #define tty_info(tty, f, ...) tty_msg(pr_info, tty, f, ##__VA_ARGS__) #define tty_notice(tty, f, ...) tty_msg(pr_notice, tty, f, ##__VA_ARGS__) #define tty_warn(tty, f, ...) tty_msg(pr_warn, tty, f, ##__VA_ARGS__) #define tty_err(tty, f, ...) tty_msg(pr_err, tty, f, ##__VA_ARGS__) #define tty_info_ratelimited(tty, f, ...) \ tty_msg(pr_info_ratelimited, tty, f, ##__VA_ARGS__) #endif
1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 // SPDX-License-Identifier: GPL-2.0-only /* * This implements the various checks for CONFIG_HARDENED_USERCOPY*, * which are designed to protect kernel memory from needless exposure * and overwrite under many unintended conditions. This code is based * on PAX_USERCOPY, which is: * * Copyright (C) 2001-2016 PaX Team, Bradley Spengler, Open Source * Security Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/mm.h> #include <linux/highmem.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/sched/task.h> #include <linux/sched/task_stack.h> #include <linux/thread_info.h> #include <linux/atomic.h> #include <linux/jump_label.h> #include <asm/sections.h> /* * Checks if a given pointer and length is contained by the current * stack frame (if possible). * * Returns: * NOT_STACK: not at all on the stack * GOOD_FRAME: fully within a valid stack frame * GOOD_STACK: fully on the stack (when can't do frame-checking) * BAD_STACK: error condition (invalid stack position or bad stack frame) */ static noinline int check_stack_object(const void *obj, unsigned long len) { const void * const stack = task_stack_page(current); const void * const stackend = stack + THREAD_SIZE; int ret; /* Object is not on the stack at all. */ if (obj + len <= stack || stackend <= obj) return NOT_STACK; /* * Reject: object partially overlaps the stack (passing the * check above means at least one end is within the stack, * so if this check fails, the other end is outside the stack). */ if (obj < stack || stackend < obj + len) return BAD_STACK; /* Check if object is safely within a valid frame. */ ret = arch_within_stack_frames(stack, stackend, obj, len); if (ret) return ret; return GOOD_STACK; } /* * If these functions are reached, then CONFIG_HARDENED_USERCOPY has found * an unexpected state during a copy_from_user() or copy_to_user() call. * There are several checks being performed on the buffer by the * __check_object_size() function. Normal stack buffer usage should never * trip the checks, and kernel text addressing will always trip the check. * For cache objects, it is checking that only the whitelisted range of * bytes for a given cache is being accessed (via the cache's usersize and * useroffset fields). To adjust a cache whitelist, use the usercopy-aware * kmem_cache_create_usercopy() function to create the cache (and * carefully audit the whitelist range). */ void usercopy_warn(const char *name, const char *detail, bool to_user, unsigned long offset, unsigned long len) { WARN_ONCE(1, "Bad or missing usercopy whitelist? Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n", to_user ? "exposure" : "overwrite", to_user ? "from" : "to", name ? : "unknown?!", detail ? " '" : "", detail ? : "", detail ? "'" : "", offset, len); } void __noreturn usercopy_abort(const char *name, const char *detail, bool to_user, unsigned long offset, unsigned long len) { pr_emerg("Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n", to_user ? "exposure" : "overwrite", to_user ? "from" : "to", name ? : "unknown?!", detail ? " '" : "", detail ? : "", detail ? "'" : "", offset, len); /* * For greater effect, it would be nice to do do_group_exit(), * but BUG() actually hooks all the lock-breaking and per-arch * Oops code, so that is used here instead. */ BUG(); } /* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */ static bool overlaps(const unsigned long ptr, unsigned long n, unsigned long low, unsigned long high) { const unsigned long check_low = ptr; unsigned long check_high = check_low + n; /* Does not overlap if entirely above or entirely below. */ if (check_low >= high || check_high <= low) return false; return true; } /* Is this address range in the kernel text area? */ static inline void check_kernel_text_object(const unsigned long ptr, unsigned long n, bool to_user) { unsigned long textlow = (unsigned long)_stext; unsigned long texthigh = (unsigned long)_etext; unsigned long textlow_linear, texthigh_linear; if (overlaps(ptr, n, textlow, texthigh)) usercopy_abort("kernel text", NULL, to_user, ptr - textlow, n); /* * Some architectures have virtual memory mappings with a secondary * mapping of the kernel text, i.e. there is more than one virtual * kernel address that points to the kernel image. It is usually * when there is a separate linear physical memory mapping, in that * __pa() is not just the reverse of __va(). This can be detected * and checked: */ textlow_linear = (unsigned long)lm_alias(textlow); /* No different mapping: we're done. */ if (textlow_linear == textlow) return; /* Check the secondary mapping... */ texthigh_linear = (unsigned long)lm_alias(texthigh); if (overlaps(ptr, n, textlow_linear, texthigh_linear)) usercopy_abort("linear kernel text", NULL, to_user, ptr - textlow_linear, n); } static inline void check_bogus_address(const unsigned long ptr, unsigned long n, bool to_user) { /* Reject if object wraps past end of memory. */ if (ptr + (n - 1) < ptr) usercopy_abort("wrapped address", NULL, to_user, 0, ptr + n); /* Reject if NULL or ZERO-allocation. */ if (ZERO_OR_NULL_PTR(ptr)) usercopy_abort("null address", NULL, to_user, ptr, n); } /* Checks for allocs that are marked in some way as spanning multiple pages. */ static inline void check_page_span(const void *ptr, unsigned long n, struct page *page, bool to_user) { #ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN const void *end = ptr + n - 1; struct page *endpage; bool is_reserved, is_cma; /* * Sometimes the kernel data regions are not marked Reserved (see * check below). And sometimes [_sdata,_edata) does not cover * rodata and/or bss, so check each range explicitly. */ /* Allow reads of kernel rodata region (if not marked as Reserved). */ if (ptr >= (const void *)__start_rodata && end <= (const void *)__end_rodata) { if (!to_user) usercopy_abort("rodata", NULL, to_user, 0, n); return; } /* Allow kernel data region (if not marked as Reserved). */ if (ptr >= (const void *)_sdata && end <= (const void *)_edata) return; /* Allow kernel bss region (if not marked as Reserved). */ if (ptr >= (const void *)__bss_start && end <= (const void *)__bss_stop) return; /* Is the object wholly within one base page? */ if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) == ((unsigned long)end & (unsigned long)PAGE_MASK))) return; /* Allow if fully inside the same compound (__GFP_COMP) page. */ endpage = virt_to_head_page(end); if (likely(endpage == page)) return; /* * Reject if range is entirely either Reserved (i.e. special or * device memory), or CMA. Otherwise, reject since the object spans * several independently allocated pages. */ is_reserved = PageReserved(page); is_cma = is_migrate_cma_page(page); if (!is_reserved && !is_cma) usercopy_abort("spans multiple pages", NULL, to_user, 0, n); for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) { page = virt_to_head_page(ptr); if (is_reserved && !PageReserved(page)) usercopy_abort("spans Reserved and non-Reserved pages", NULL, to_user, 0, n); if (is_cma && !is_migrate_cma_page(page)) usercopy_abort("spans CMA and non-CMA pages", NULL, to_user, 0, n); } #endif } static inline void check_heap_object(const void *ptr, unsigned long n, bool to_user) { struct page *page; if (!virt_addr_valid(ptr)) return; /* * When CONFIG_HIGHMEM=y, kmap_to_page() will give either the * highmem page or fallback to virt_to_page(). The following * is effectively a highmem-aware virt_to_head_page(). */ page = compound_head(kmap_to_page((void *)ptr)); if (PageSlab(page)) { /* Check slab allocator for flags and size. */ __check_heap_object(ptr, n, page, to_user); } else { /* Verify object does not incorrectly span multiple pages. */ check_page_span(ptr, n, page, to_user); } } static DEFINE_STATIC_KEY_FALSE_RO(bypass_usercopy_checks); /* * Validates that the given object is: * - not bogus address * - fully contained by stack (or stack frame, when available) * - fully within SLAB object (or object whitelist area, when available) * - not in kernel text */ void __check_object_size(const void *ptr, unsigned long n, bool to_user) { if (static_branch_unlikely(&bypass_usercopy_checks)) return; /* Skip all tests if size is zero. */ if (!n) return; /* Check for invalid addresses. */ check_bogus_address((const unsigned long)ptr, n, to_user); /* Check for bad stack object. */ switch (check_stack_object(ptr, n)) { case NOT_STACK: /* Object is not touching the current process stack. */ break; case GOOD_FRAME: case GOOD_STACK: /* * Object is either in the correct frame (when it * is possible to check) or just generally on the * process stack (when frame checking not available). */ return; default: usercopy_abort("process stack", NULL, to_user, 0, n); } /* Check for bad heap object. */ check_heap_object(ptr, n, to_user); /* Check for object in kernel to avoid text exposure. */ check_kernel_text_object((const unsigned long)ptr, n, to_user); } EXPORT_SYMBOL(__check_object_size); static bool enable_checks __initdata = true; static int __init parse_hardened_usercopy(char *str) { return strtobool(str, &enable_checks); } __setup("hardened_usercopy=", parse_hardened_usercopy); static int __init set_hardened_usercopy(void) { if (enable_checks == false) static_branch_enable(&bypass_usercopy_checks); return 1; } late_initcall(set_hardened_usercopy);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 /* SPDX-License-Identifier: GPL-2.0 */ /* * include/linux/pagevec.h * * In many places it is efficient to batch an operation up against multiple * pages. A pagevec is a multipage container which is used for that. */ #ifndef _LINUX_PAGEVEC_H #define _LINUX_PAGEVEC_H #include <linux/xarray.h> /* 15 pointers + header align the pagevec structure to a power of two */ #define PAGEVEC_SIZE 15 struct page; struct address_space; struct pagevec { unsigned char nr; bool percpu_pvec_drained; struct page *pages[PAGEVEC_SIZE]; }; void __pagevec_release(struct pagevec *pvec); void __pagevec_lru_add(struct pagevec *pvec); unsigned pagevec_lookup_entries(struct pagevec *pvec, struct address_space *mapping, pgoff_t start, unsigned nr_entries, pgoff_t *indices); void pagevec_remove_exceptionals(struct pagevec *pvec); unsigned pagevec_lookup_range(struct pagevec *pvec, struct address_space *mapping, pgoff_t *start, pgoff_t end); static inline unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, pgoff_t *start) { return pagevec_lookup_range(pvec, mapping, start, (pgoff_t)-1); } unsigned pagevec_lookup_range_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, pgoff_t end, xa_mark_t tag); unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, pgoff_t end, xa_mark_t tag, unsigned max_pages); static inline unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, xa_mark_t tag) { return pagevec_lookup_range_tag(pvec, mapping, index, (pgoff_t)-1, tag); } static inline void pagevec_init(struct pagevec *pvec) { pvec->nr = 0; pvec->percpu_pvec_drained = false; } static inline void pagevec_reinit(struct pagevec *pvec) { pvec->nr = 0; } static inline unsigned pagevec_count(struct pagevec *pvec) { return pvec->nr; } static inline unsigned pagevec_space(struct pagevec *pvec) { return PAGEVEC_SIZE - pvec->nr; } /* * Add a page to a pagevec. Returns the number of slots still available. */ static inline unsigned pagevec_add(struct pagevec *pvec, struct page *page) { pvec->pages[pvec->nr++] = page; return pagevec_space(pvec); } static inline void pagevec_release(struct pagevec *pvec) { if (pagevec_count(pvec)) __pagevec_release(pvec); } #endif /* _LINUX_PAGEVEC_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_FRAG_H__ #define __NET_FRAG_H__ #include <linux/rhashtable-types.h> #include <linux/completion.h> /* Per netns frag queues directory */ struct fqdir { /* sysctls */ long high_thresh; long low_thresh; int timeout; int max_dist; struct inet_frags *f; struct net *net; bool dead; struct rhashtable rhashtable ____cacheline_aligned_in_smp; /* Keep atomic mem on separate cachelines in structs that include it */ atomic_long_t mem ____cacheline_aligned_in_smp; struct work_struct destroy_work; }; /** * fragment queue flags * * @INET_FRAG_FIRST_IN: first fragment has arrived * @INET_FRAG_LAST_IN: final fragment has arrived * @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction * @INET_FRAG_HASH_DEAD: inet_frag_kill() has not removed fq from rhashtable */ enum { INET_FRAG_FIRST_IN = BIT(0), INET_FRAG_LAST_IN = BIT(1), INET_FRAG_COMPLETE = BIT(2), INET_FRAG_HASH_DEAD = BIT(3), }; struct frag_v4_compare_key { __be32 saddr; __be32 daddr; u32 user; u32 vif; __be16 id; u16 protocol; }; struct frag_v6_compare_key { struct in6_addr saddr; struct in6_addr daddr; u32 user; __be32 id; u32 iif; }; /** * struct inet_frag_queue - fragment queue * * @node: rhash node * @key: keys identifying this frag. * @timer: queue expiration timer * @lock: spinlock protecting this frag * @refcnt: reference count of the queue * @rb_fragments: received fragments rb-tree root * @fragments_tail: received fragments tail * @last_run_head: the head of the last "run". see ip_fragment.c * @stamp: timestamp of the last received fragment * @len: total length of the original datagram * @meat: length of received fragments so far * @flags: fragment queue flags * @max_size: maximum received fragment size * @fqdir: pointer to struct fqdir * @rcu: rcu head for freeing deferall */ struct inet_frag_queue { struct rhash_head node; union { struct frag_v4_compare_key v4; struct frag_v6_compare_key v6; } key; struct timer_list timer; spinlock_t lock; refcount_t refcnt; struct rb_root rb_fragments; struct sk_buff *fragments_tail; struct sk_buff *last_run_head; ktime_t stamp; int len; int meat; __u8 flags; u16 max_size; struct fqdir *fqdir; struct rcu_head rcu; }; struct inet_frags { unsigned int qsize; void (*constructor)(struct inet_frag_queue *q, const void *arg); void (*destructor)(struct inet_frag_queue *); void (*frag_expire)(struct timer_list *t); struct kmem_cache *frags_cachep; const char *frags_cache_name; struct rhashtable_params rhash_params; refcount_t refcnt; struct completion completion; }; int inet_frags_init(struct inet_frags *); void inet_frags_fini(struct inet_frags *); int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net); static inline void fqdir_pre_exit(struct fqdir *fqdir) { fqdir->high_thresh = 0; /* prevent creation of new frags */ fqdir->dead = true; } void fqdir_exit(struct fqdir *fqdir); void inet_frag_kill(struct inet_frag_queue *q); void inet_frag_destroy(struct inet_frag_queue *q); struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key); /* Free all skbs in the queue; return the sum of their truesizes. */ unsigned int inet_frag_rbtree_purge(struct rb_root *root); static inline void inet_frag_put(struct inet_frag_queue *q) { if (refcount_dec_and_test(&q->refcnt)) inet_frag_destroy(q); } /* Memory Tracking Functions. */ static inline long frag_mem_limit(const struct fqdir *fqdir) { return atomic_long_read(&fqdir->mem); } static inline void sub_frag_mem_limit(struct fqdir *fqdir, long val) { atomic_long_sub(val, &fqdir->mem); } static inline void add_frag_mem_limit(struct fqdir *fqdir, long val) { atomic_long_add(val, &fqdir->mem); } /* RFC 3168 support : * We want to check ECN values of all fragments, do detect invalid combinations. * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value. */ #define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */ #define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */ #define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */ #define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */ extern const u8 ip_frag_ecn_table[16]; /* Return values of inet_frag_queue_insert() */ #define IPFRAG_OK 0 #define IPFRAG_DUP 1 #define IPFRAG_OVERLAP 2 int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, int offset, int end); void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, struct sk_buff *parent); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, void *reasm_data, bool try_coalesce); struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q); #endif
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 // SPDX-License-Identifier: GPL-2.0 /* * drivers/base/core.c - core driver model code (device registration, etc) * * Copyright (c) 2002-3 Patrick Mochel * Copyright (c) 2002-3 Open Source Development Labs * Copyright (c) 2006 Greg Kroah-Hartman <gregkh@suse.de> * Copyright (c) 2006 Novell, Inc. */ #include <linux/acpi.h> #include <linux/cpufreq.h> #include <linux/device.h> #include <linux/err.h> #include <linux/fwnode.h> #include <linux/init.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/kdev_t.h> #include <linux/notifier.h> #include <linux/of.h> #include <linux/of_device.h> #include <linux/genhd.h> #include <linux/mutex.h> #include <linux/pm_runtime.h> #include <linux/netdevice.h> #include <linux/sched/signal.h> #include <linux/sched/mm.h> #include <linux/sysfs.h> #include "base.h" #include "power/power.h" #ifdef CONFIG_SYSFS_DEPRECATED #ifdef CONFIG_SYSFS_DEPRECATED_V2 long sysfs_deprecated = 1; #else long sysfs_deprecated = 0; #endif static int __init sysfs_deprecated_setup(char *arg) { return kstrtol(arg, 10, &sysfs_deprecated); } early_param("sysfs.deprecated", sysfs_deprecated_setup); #endif /* Device links support. */ static LIST_HEAD(wait_for_suppliers); static DEFINE_MUTEX(wfs_lock); static LIST_HEAD(deferred_sync); static unsigned int defer_sync_state_count = 1; static unsigned int defer_fw_devlink_count; static LIST_HEAD(deferred_fw_devlink); static DEFINE_MUTEX(defer_fw_devlink_lock); static bool fw_devlink_is_permissive(void); #ifdef CONFIG_SRCU static DEFINE_MUTEX(device_links_lock); DEFINE_STATIC_SRCU(device_links_srcu); static inline void device_links_write_lock(void) { mutex_lock(&device_links_lock); } static inline void device_links_write_unlock(void) { mutex_unlock(&device_links_lock); } int device_links_read_lock(void) __acquires(&device_links_srcu) { return srcu_read_lock(&device_links_srcu); } void device_links_read_unlock(int idx) __releases(&device_links_srcu) { srcu_read_unlock(&device_links_srcu, idx); } int device_links_read_lock_held(void) { return srcu_read_lock_held(&device_links_srcu); } static void device_link_synchronize_removal(void) { synchronize_srcu(&device_links_srcu); } #else /* !CONFIG_SRCU */ static DECLARE_RWSEM(device_links_lock); static inline void device_links_write_lock(void) { down_write(&device_links_lock); } static inline void device_links_write_unlock(void) { up_write(&device_links_lock); } int device_links_read_lock(void) { down_read(&device_links_lock); return 0; } void device_links_read_unlock(int not_used) { up_read(&device_links_lock); } #ifdef CONFIG_DEBUG_LOCK_ALLOC int device_links_read_lock_held(void) { return lockdep_is_held(&device_links_lock); } #endif static inline void device_link_synchronize_removal(void) { } #endif /* !CONFIG_SRCU */ static bool device_is_ancestor(struct device *dev, struct device *target) { while (target->parent) { target = target->parent; if (dev == target) return true; } return false; } /** * device_is_dependent - Check if one device depends on another one * @dev: Device to check dependencies for. * @target: Device to check against. * * Check if @target depends on @dev or any device dependent on it (its child or * its consumer etc). Return 1 if that is the case or 0 otherwise. */ int device_is_dependent(struct device *dev, void *target) { struct device_link *link; int ret; /* * The "ancestors" check is needed to catch the case when the target * device has not been completely initialized yet and it is still * missing from the list of children of its parent device. */ if (dev == target || device_is_ancestor(dev, target)) return 1; ret = device_for_each_child(dev, target, device_is_dependent); if (ret) return ret; list_for_each_entry(link, &dev->links.consumers, s_node) { if (link->flags == (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED)) continue; if (link->consumer == target) return 1; ret = device_is_dependent(link->consumer, target); if (ret) break; } return ret; } static void device_link_init_status(struct device_link *link, struct device *consumer, struct device *supplier) { switch (supplier->links.status) { case DL_DEV_PROBING: switch (consumer->links.status) { case DL_DEV_PROBING: /* * A consumer driver can create a link to a supplier * that has not completed its probing yet as long as it * knows that the supplier is already functional (for * example, it has just acquired some resources from the * supplier). */ link->status = DL_STATE_CONSUMER_PROBE; break; default: link->status = DL_STATE_DORMANT; break; } break; case DL_DEV_DRIVER_BOUND: switch (consumer->links.status) { case DL_DEV_PROBING: link->status = DL_STATE_CONSUMER_PROBE; break; case DL_DEV_DRIVER_BOUND: link->status = DL_STATE_ACTIVE; break; default: link->status = DL_STATE_AVAILABLE; break; } break; case DL_DEV_UNBINDING: link->status = DL_STATE_SUPPLIER_UNBIND; break; default: link->status = DL_STATE_DORMANT; break; } } static int device_reorder_to_tail(struct device *dev, void *not_used) { struct device_link *link; /* * Devices that have not been registered yet will be put to the ends * of the lists during the registration, so skip them here. */ if (device_is_registered(dev)) devices_kset_move_last(dev); if (device_pm_initialized(dev)) device_pm_move_last(dev); device_for_each_child(dev, NULL, device_reorder_to_tail); list_for_each_entry(link, &dev->links.consumers, s_node) { if (link->flags == (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED)) continue; device_reorder_to_tail(link->consumer, NULL); } return 0; } /** * device_pm_move_to_tail - Move set of devices to the end of device lists * @dev: Device to move * * This is a device_reorder_to_tail() wrapper taking the requisite locks. * * It moves the @dev along with all of its children and all of its consumers * to the ends of the device_kset and dpm_list, recursively. */ void device_pm_move_to_tail(struct device *dev) { int idx; idx = device_links_read_lock(); device_pm_lock(); device_reorder_to_tail(dev, NULL); device_pm_unlock(); device_links_read_unlock(idx); } #define to_devlink(dev) container_of((dev), struct device_link, link_dev) static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *buf) { const char *output; switch (to_devlink(dev)->status) { case DL_STATE_NONE: output = "not tracked"; break; case DL_STATE_DORMANT: output = "dormant"; break; case DL_STATE_AVAILABLE: output = "available"; break; case DL_STATE_CONSUMER_PROBE: output = "consumer probing"; break; case DL_STATE_ACTIVE: output = "active"; break; case DL_STATE_SUPPLIER_UNBIND: output = "supplier unbinding"; break; default: output = "unknown"; break; } return sysfs_emit(buf, "%s\n", output); } static DEVICE_ATTR_RO(status); static ssize_t auto_remove_on_show(struct device *dev, struct device_attribute *attr, char *buf) { struct device_link *link = to_devlink(dev); const char *output; if (link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER) output = "supplier unbind"; else if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) output = "consumer unbind"; else output = "never"; return sysfs_emit(buf, "%s\n", output); } static DEVICE_ATTR_RO(auto_remove_on); static ssize_t runtime_pm_show(struct device *dev, struct device_attribute *attr, char *buf) { struct device_link *link = to_devlink(dev); return sysfs_emit(buf, "%d\n", !!(link->flags & DL_FLAG_PM_RUNTIME)); } static DEVICE_ATTR_RO(runtime_pm); static ssize_t sync_state_only_show(struct device *dev, struct device_attribute *attr, char *buf) { struct device_link *link = to_devlink(dev); return sysfs_emit(buf, "%d\n", !!(link->flags & DL_FLAG_SYNC_STATE_ONLY)); } static DEVICE_ATTR_RO(sync_state_only); static struct attribute *devlink_attrs[] = { &dev_attr_status.attr, &dev_attr_auto_remove_on.attr, &dev_attr_runtime_pm.attr, &dev_attr_sync_state_only.attr, NULL, }; ATTRIBUTE_GROUPS(devlink); static void device_link_release_fn(struct work_struct *work) { struct device_link *link = container_of(work, struct device_link, rm_work); /* Ensure that all references to the link object have been dropped. */ device_link_synchronize_removal(); while (refcount_dec_not_one(&link->rpm_active)) pm_runtime_put(link->supplier); put_device(link->consumer); put_device(link->supplier); kfree(link); } static void devlink_dev_release(struct device *dev) { struct device_link *link = to_devlink(dev); INIT_WORK(&link->rm_work, device_link_release_fn); /* * It may take a while to complete this work because of the SRCU * synchronization in device_link_release_fn() and if the consumer or * supplier devices get deleted when it runs, so put it into the "long" * workqueue. */ queue_work(system_long_wq, &link->rm_work); } static struct class devlink_class = { .name = "devlink", .owner = THIS_MODULE, .dev_groups = devlink_groups, .dev_release = devlink_dev_release, }; static int devlink_add_symlinks(struct device *dev, struct class_interface *class_intf) { int ret; size_t len; struct device_link *link = to_devlink(dev); struct device *sup = link->supplier; struct device *con = link->consumer; char *buf; len = max(strlen(dev_bus_name(sup)) + strlen(dev_name(sup)), strlen(dev_bus_name(con)) + strlen(dev_name(con))); len += strlen(":"); len += strlen("supplier:") + 1; buf = kzalloc(len, GFP_KERNEL); if (!buf) return -ENOMEM; ret = sysfs_create_link(&link->link_dev.kobj, &sup->kobj, "supplier"); if (ret) goto out; ret = sysfs_create_link(&link->link_dev.kobj, &con->kobj, "consumer"); if (ret) goto err_con; snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); ret = sysfs_create_link(&sup->kobj, &link->link_dev.kobj, buf); if (ret) goto err_con_dev; snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); ret = sysfs_create_link(&con->kobj, &link->link_dev.kobj, buf); if (ret) goto err_sup_dev; goto out; err_sup_dev: snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); sysfs_remove_link(&sup->kobj, buf); err_con_dev: sysfs_remove_link(&link->link_dev.kobj, "consumer"); err_con: sysfs_remove_link(&link->link_dev.kobj, "supplier"); out: kfree(buf); return ret; } static void devlink_remove_symlinks(struct device *dev, struct class_interface *class_intf) { struct device_link *link = to_devlink(dev); size_t len; struct device *sup = link->supplier; struct device *con = link->consumer; char *buf; sysfs_remove_link(&link->link_dev.kobj, "consumer"); sysfs_remove_link(&link->link_dev.kobj, "supplier"); len = max(strlen(dev_bus_name(sup)) + strlen(dev_name(sup)), strlen(dev_bus_name(con)) + strlen(dev_name(con))); len += strlen(":"); len += strlen("supplier:") + 1; buf = kzalloc(len, GFP_KERNEL); if (!buf) { WARN(1, "Unable to properly free device link symlinks!\n"); return; } if (device_is_registered(con)) { snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); sysfs_remove_link(&con->kobj, buf); } snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); sysfs_remove_link(&sup->kobj, buf); kfree(buf); } static struct class_interface devlink_class_intf = { .class = &devlink_class, .add_dev = devlink_add_symlinks, .remove_dev = devlink_remove_symlinks, }; static int __init devlink_class_init(void) { int ret; ret = class_register(&devlink_class); if (ret) return ret; ret = class_interface_register(&devlink_class_intf); if (ret) class_unregister(&devlink_class); return ret; } postcore_initcall(devlink_class_init); #define DL_MANAGED_LINK_FLAGS (DL_FLAG_AUTOREMOVE_CONSUMER | \ DL_FLAG_AUTOREMOVE_SUPPLIER | \ DL_FLAG_AUTOPROBE_CONSUMER | \ DL_FLAG_SYNC_STATE_ONLY) #define DL_ADD_VALID_FLAGS (DL_MANAGED_LINK_FLAGS | DL_FLAG_STATELESS | \ DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE) /** * device_link_add - Create a link between two devices. * @consumer: Consumer end of the link. * @supplier: Supplier end of the link. * @flags: Link flags. * * The caller is responsible for the proper synchronization of the link creation * with runtime PM. First, setting the DL_FLAG_PM_RUNTIME flag will cause the * runtime PM framework to take the link into account. Second, if the * DL_FLAG_RPM_ACTIVE flag is set in addition to it, the supplier devices will * be forced into the active metastate and reference-counted upon the creation * of the link. If DL_FLAG_PM_RUNTIME is not set, DL_FLAG_RPM_ACTIVE will be * ignored. * * If DL_FLAG_STATELESS is set in @flags, the caller of this function is * expected to release the link returned by it directly with the help of either * device_link_del() or device_link_remove(). * * If that flag is not set, however, the caller of this function is handing the * management of the link over to the driver core entirely and its return value * can only be used to check whether or not the link is present. In that case, * the DL_FLAG_AUTOREMOVE_CONSUMER and DL_FLAG_AUTOREMOVE_SUPPLIER device link * flags can be used to indicate to the driver core when the link can be safely * deleted. Namely, setting one of them in @flags indicates to the driver core * that the link is not going to be used (by the given caller of this function) * after unbinding the consumer or supplier driver, respectively, from its * device, so the link can be deleted at that point. If none of them is set, * the link will be maintained until one of the devices pointed to by it (either * the consumer or the supplier) is unregistered. * * Also, if DL_FLAG_STATELESS, DL_FLAG_AUTOREMOVE_CONSUMER and * DL_FLAG_AUTOREMOVE_SUPPLIER are not set in @flags (that is, a persistent * managed device link is being added), the DL_FLAG_AUTOPROBE_CONSUMER flag can * be used to request the driver core to automaticall probe for a consmer * driver after successfully binding a driver to the supplier device. * * The combination of DL_FLAG_STATELESS and one of DL_FLAG_AUTOREMOVE_CONSUMER, * DL_FLAG_AUTOREMOVE_SUPPLIER, or DL_FLAG_AUTOPROBE_CONSUMER set in @flags at * the same time is invalid and will cause NULL to be returned upfront. * However, if a device link between the given @consumer and @supplier pair * exists already when this function is called for them, the existing link will * be returned regardless of its current type and status (the link's flags may * be modified then). The caller of this function is then expected to treat * the link as though it has just been created, so (in particular) if * DL_FLAG_STATELESS was passed in @flags, the link needs to be released * explicitly when not needed any more (as stated above). * * A side effect of the link creation is re-ordering of dpm_list and the * devices_kset list by moving the consumer device and all devices depending * on it to the ends of these lists (that does not happen to devices that have * not been registered when this function is called). * * The supplier device is required to be registered when this function is called * and NULL will be returned if that is not the case. The consumer device need * not be registered, however. */ struct device_link *device_link_add(struct device *consumer, struct device *supplier, u32 flags) { struct device_link *link; if (!consumer || !supplier || consumer == supplier || flags & ~DL_ADD_VALID_FLAGS || (flags & DL_FLAG_STATELESS && flags & DL_MANAGED_LINK_FLAGS) || (flags & DL_FLAG_SYNC_STATE_ONLY && flags != DL_FLAG_SYNC_STATE_ONLY) || (flags & DL_FLAG_AUTOPROBE_CONSUMER && flags & (DL_FLAG_AUTOREMOVE_CONSUMER | DL_FLAG_AUTOREMOVE_SUPPLIER))) return NULL; if (flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) { if (pm_runtime_get_sync(supplier) < 0) { pm_runtime_put_noidle(supplier); return NULL; } } if (!(flags & DL_FLAG_STATELESS)) flags |= DL_FLAG_MANAGED; device_links_write_lock(); device_pm_lock(); /* * If the supplier has not been fully registered yet or there is a * reverse (non-SYNC_STATE_ONLY) dependency between the consumer and * the supplier already in the graph, return NULL. If the link is a * SYNC_STATE_ONLY link, we don't check for reverse dependencies * because it only affects sync_state() callbacks. */ if (!device_pm_initialized(supplier) || (!(flags & DL_FLAG_SYNC_STATE_ONLY) && device_is_dependent(consumer, supplier))) { link = NULL; goto out; } /* * DL_FLAG_AUTOREMOVE_SUPPLIER indicates that the link will be needed * longer than for DL_FLAG_AUTOREMOVE_CONSUMER and setting them both * together doesn't make sense, so prefer DL_FLAG_AUTOREMOVE_SUPPLIER. */ if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER; list_for_each_entry(link, &supplier->links.consumers, s_node) { if (link->consumer != consumer) continue; if (flags & DL_FLAG_PM_RUNTIME) { if (!(link->flags & DL_FLAG_PM_RUNTIME)) { pm_runtime_new_link(consumer); link->flags |= DL_FLAG_PM_RUNTIME; } if (flags & DL_FLAG_RPM_ACTIVE) refcount_inc(&link->rpm_active); } if (flags & DL_FLAG_STATELESS) { kref_get(&link->kref); if (link->flags & DL_FLAG_SYNC_STATE_ONLY && !(link->flags & DL_FLAG_STATELESS)) { link->flags |= DL_FLAG_STATELESS; goto reorder; } else { link->flags |= DL_FLAG_STATELESS; goto out; } } /* * If the life time of the link following from the new flags is * longer than indicated by the flags of the existing link, * update the existing link to stay around longer. */ if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) { if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) { link->flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER; link->flags |= DL_FLAG_AUTOREMOVE_SUPPLIER; } } else if (!(flags & DL_FLAG_AUTOREMOVE_CONSUMER)) { link->flags &= ~(DL_FLAG_AUTOREMOVE_CONSUMER | DL_FLAG_AUTOREMOVE_SUPPLIER); } if (!(link->flags & DL_FLAG_MANAGED)) { kref_get(&link->kref); link->flags |= DL_FLAG_MANAGED; device_link_init_status(link, consumer, supplier); } if (link->flags & DL_FLAG_SYNC_STATE_ONLY && !(flags & DL_FLAG_SYNC_STATE_ONLY)) { link->flags &= ~DL_FLAG_SYNC_STATE_ONLY; goto reorder; } goto out; } link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) goto out; refcount_set(&link->rpm_active, 1); get_device(supplier); link->supplier = supplier; INIT_LIST_HEAD(&link->s_node); get_device(consumer); link->consumer = consumer; INIT_LIST_HEAD(&link->c_node); link->flags = flags; kref_init(&link->kref); link->link_dev.class = &devlink_class; device_set_pm_not_required(&link->link_dev); dev_set_name(&link->link_dev, "%s:%s--%s:%s", dev_bus_name(supplier), dev_name(supplier), dev_bus_name(consumer), dev_name(consumer)); if (device_register(&link->link_dev)) { put_device(&link->link_dev); link = NULL; goto out; } if (flags & DL_FLAG_PM_RUNTIME) { if (flags & DL_FLAG_RPM_ACTIVE) refcount_inc(&link->rpm_active); pm_runtime_new_link(consumer); } /* Determine the initial link state. */ if (flags & DL_FLAG_STATELESS) link->status = DL_STATE_NONE; else device_link_init_status(link, consumer, supplier); /* * Some callers expect the link creation during consumer driver probe to * resume the supplier even without DL_FLAG_RPM_ACTIVE. */ if (link->status == DL_STATE_CONSUMER_PROBE && flags & DL_FLAG_PM_RUNTIME) pm_runtime_resume(supplier); list_add_tail_rcu(&link->s_node, &supplier->links.consumers); list_add_tail_rcu(&link->c_node, &consumer->links.suppliers); if (flags & DL_FLAG_SYNC_STATE_ONLY) { dev_dbg(consumer, "Linked as a sync state only consumer to %s\n", dev_name(supplier)); goto out; } reorder: /* * Move the consumer and all of the devices depending on it to the end * of dpm_list and the devices_kset list. * * It is necessary to hold dpm_list locked throughout all that or else * we may end up suspending with a wrong ordering of it. */ device_reorder_to_tail(consumer, NULL); dev_dbg(consumer, "Linked as a consumer to %s\n", dev_name(supplier)); out: device_pm_unlock(); device_links_write_unlock(); if ((flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) && !link) pm_runtime_put(supplier); return link; } EXPORT_SYMBOL_GPL(device_link_add); /** * device_link_wait_for_supplier - Add device to wait_for_suppliers list * @consumer: Consumer device * * Marks the @consumer device as waiting for suppliers to become available by * adding it to the wait_for_suppliers list. The consumer device will never be * probed until it's removed from the wait_for_suppliers list. * * The caller is responsible for adding the links to the supplier devices once * they are available and removing the @consumer device from the * wait_for_suppliers list once links to all the suppliers have been created. * * This function is NOT meant to be called from the probe function of the * consumer but rather from code that creates/adds the consumer device. */ static void device_link_wait_for_supplier(struct device *consumer, bool need_for_probe) { mutex_lock(&wfs_lock); list_add_tail(&consumer->links.needs_suppliers, &wait_for_suppliers); consumer->links.need_for_probe = need_for_probe; mutex_unlock(&wfs_lock); } static void device_link_wait_for_mandatory_supplier(struct device *consumer) { device_link_wait_for_supplier(consumer, true); } static void device_link_wait_for_optional_supplier(struct device *consumer) { device_link_wait_for_supplier(consumer, false); } /** * device_link_add_missing_supplier_links - Add links from consumer devices to * supplier devices, leaving any * consumer with inactive suppliers on * the wait_for_suppliers list * * Loops through all consumers waiting on suppliers and tries to add all their * supplier links. If that succeeds, the consumer device is removed from * wait_for_suppliers list. Otherwise, they are left in the wait_for_suppliers * list. Devices left on the wait_for_suppliers list will not be probed. * * The fwnode add_links callback is expected to return 0 if it has found and * added all the supplier links for the consumer device. It should return an * error if it isn't able to do so. * * The caller of device_link_wait_for_supplier() is expected to call this once * it's aware of potential suppliers becoming available. */ static void device_link_add_missing_supplier_links(void) { struct device *dev, *tmp; mutex_lock(&wfs_lock); list_for_each_entry_safe(dev, tmp, &wait_for_suppliers, links.needs_suppliers) { int ret = fwnode_call_int_op(dev->fwnode, add_links, dev); if (!ret) list_del_init(&dev->links.needs_suppliers); else if (ret != -ENODEV || fw_devlink_is_permissive()) dev->links.need_for_probe = false; } mutex_unlock(&wfs_lock); } #ifdef CONFIG_SRCU static void __device_link_del(struct kref *kref) { struct device_link *link = container_of(kref, struct device_link, kref); dev_dbg(link->consumer, "Dropping the link to %s\n", dev_name(link->supplier)); pm_runtime_drop_link(link); list_del_rcu(&link->s_node); list_del_rcu(&link->c_node); device_unregister(&link->link_dev); } #else /* !CONFIG_SRCU */ static void __device_link_del(struct kref *kref) { struct device_link *link = container_of(kref, struct device_link, kref); dev_info(link->consumer, "Dropping the link to %s\n", dev_name(link->supplier)); pm_runtime_drop_link(link); list_del(&link->s_node); list_del(&link->c_node); device_unregister(&link->link_dev); } #endif /* !CONFIG_SRCU */ static void device_link_put_kref(struct device_link *link) { if (link->flags & DL_FLAG_STATELESS) kref_put(&link->kref, __device_link_del); else WARN(1, "Unable to drop a managed device link reference\n"); } /** * device_link_del - Delete a stateless link between two devices. * @link: Device link to delete. * * The caller must ensure proper synchronization of this function with runtime * PM. If the link was added multiple times, it needs to be deleted as often. * Care is required for hotplugged devices: Their links are purged on removal * and calling device_link_del() is then no longer allowed. */ void device_link_del(struct device_link *link) { device_links_write_lock(); device_link_put_kref(link); device_links_write_unlock(); } EXPORT_SYMBOL_GPL(device_link_del); /** * device_link_remove - Delete a stateless link between two devices. * @consumer: Consumer end of the link. * @supplier: Supplier end of the link. * * The caller must ensure proper synchronization of this function with runtime * PM. */ void device_link_remove(void *consumer, struct device *supplier) { struct device_link *link; if (WARN_ON(consumer == supplier)) return; device_links_write_lock(); list_for_each_entry(link, &supplier->links.consumers, s_node) { if (link->consumer == consumer) { device_link_put_kref(link); break; } } device_links_write_unlock(); } EXPORT_SYMBOL_GPL(device_link_remove); static void device_links_missing_supplier(struct device *dev) { struct device_link *link; list_for_each_entry(link, &dev->links.suppliers, c_node) { if (link->status != DL_STATE_CONSUMER_PROBE) continue; if (link->supplier->links.status == DL_DEV_DRIVER_BOUND) { WRITE_ONCE(link->status, DL_STATE_AVAILABLE); } else { WARN_ON(!(link->flags & DL_FLAG_SYNC_STATE_ONLY)); WRITE_ONCE(link->status, DL_STATE_DORMANT); } } } /** * device_links_check_suppliers - Check presence of supplier drivers. * @dev: Consumer device. * * Check links from this device to any suppliers. Walk the list of the device's * links to suppliers and see if all of them are available. If not, simply * return -EPROBE_DEFER. * * We need to guarantee that the supplier will not go away after the check has * been positive here. It only can go away in __device_release_driver() and * that function checks the device's links to consumers. This means we need to * mark the link as "consumer probe in progress" to make the supplier removal * wait for us to complete (or bad things may happen). * * Links without the DL_FLAG_MANAGED flag set are ignored. */ int device_links_check_suppliers(struct device *dev) { struct device_link *link; int ret = 0; /* * Device waiting for supplier to become available is not allowed to * probe. */ mutex_lock(&wfs_lock); if (!list_empty(&dev->links.needs_suppliers) && dev->links.need_for_probe) { mutex_unlock(&wfs_lock); return -EPROBE_DEFER; } mutex_unlock(&wfs_lock); device_links_write_lock(); list_for_each_entry(link, &dev->links.suppliers, c_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; if (link->status != DL_STATE_AVAILABLE && !(link->flags & DL_FLAG_SYNC_STATE_ONLY)) { device_links_missing_supplier(dev); ret = -EPROBE_DEFER; break; } WRITE_ONCE(link->status, DL_STATE_CONSUMER_PROBE); } dev->links.status = DL_DEV_PROBING; device_links_write_unlock(); return ret; } /** * __device_links_queue_sync_state - Queue a device for sync_state() callback * @dev: Device to call sync_state() on * @list: List head to queue the @dev on * * Queues a device for a sync_state() callback when the device links write lock * isn't held. This allows the sync_state() execution flow to use device links * APIs. The caller must ensure this function is called with * device_links_write_lock() held. * * This function does a get_device() to make sure the device is not freed while * on this list. * * So the caller must also ensure that device_links_flush_sync_list() is called * as soon as the caller releases device_links_write_lock(). This is necessary * to make sure the sync_state() is called in a timely fashion and the * put_device() is called on this device. */ static void __device_links_queue_sync_state(struct device *dev, struct list_head *list) { struct device_link *link; if (!dev_has_sync_state(dev)) return; if (dev->state_synced) return; list_for_each_entry(link, &dev->links.consumers, s_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; if (link->status != DL_STATE_ACTIVE) return; } /* * Set the flag here to avoid adding the same device to a list more * than once. This can happen if new consumers get added to the device * and probed before the list is flushed. */ dev->state_synced = true; if (WARN_ON(!list_empty(&dev->links.defer_hook))) return; get_device(dev); list_add_tail(&dev->links.defer_hook, list); } /** * device_links_flush_sync_list - Call sync_state() on a list of devices * @list: List of devices to call sync_state() on * @dont_lock_dev: Device for which lock is already held by the caller * * Calls sync_state() on all the devices that have been queued for it. This * function is used in conjunction with __device_links_queue_sync_state(). The * @dont_lock_dev parameter is useful when this function is called from a * context where a device lock is already held. */ static void device_links_flush_sync_list(struct list_head *list, struct device *dont_lock_dev) { struct device *dev, *tmp; list_for_each_entry_safe(dev, tmp, list, links.defer_hook) { list_del_init(&dev->links.defer_hook); if (dev != dont_lock_dev) device_lock(dev); if (dev->bus->sync_state) dev->bus->sync_state(dev); else if (dev->driver && dev->driver->sync_state) dev->driver->sync_state(dev); if (dev != dont_lock_dev) device_unlock(dev); put_device(dev); } } void device_links_supplier_sync_state_pause(void) { device_links_write_lock(); defer_sync_state_count++; device_links_write_unlock(); } void device_links_supplier_sync_state_resume(void) { struct device *dev, *tmp; LIST_HEAD(sync_list); device_links_write_lock(); if (!defer_sync_state_count) { WARN(true, "Unmatched sync_state pause/resume!"); goto out; } defer_sync_state_count--; if (defer_sync_state_count) goto out; list_for_each_entry_safe(dev, tmp, &deferred_sync, links.defer_hook) { /* * Delete from deferred_sync list before queuing it to * sync_list because defer_hook is used for both lists. */ list_del_init(&dev->links.defer_hook); __device_links_queue_sync_state(dev, &sync_list); } out: device_links_write_unlock(); device_links_flush_sync_list(&sync_list, NULL); } static int sync_state_resume_initcall(void) { device_links_supplier_sync_state_resume(); return 0; } late_initcall(sync_state_resume_initcall); static void __device_links_supplier_defer_sync(struct device *sup) { if (list_empty(&sup->links.defer_hook) && dev_has_sync_state(sup)) list_add_tail(&sup->links.defer_hook, &deferred_sync); } static void device_link_drop_managed(struct device_link *link) { link->flags &= ~DL_FLAG_MANAGED; WRITE_ONCE(link->status, DL_STATE_NONE); kref_put(&link->kref, __device_link_del); } static ssize_t waiting_for_supplier_show(struct device *dev, struct device_attribute *attr, char *buf) { bool val; device_lock(dev); mutex_lock(&wfs_lock); val = !list_empty(&dev->links.needs_suppliers) && dev->links.need_for_probe; mutex_unlock(&wfs_lock); device_unlock(dev); return sysfs_emit(buf, "%u\n", val); } static DEVICE_ATTR_RO(waiting_for_supplier); /** * device_links_driver_bound - Update device links after probing its driver. * @dev: Device to update the links for. * * The probe has been successful, so update links from this device to any * consumers by changing their status to "available". * * Also change the status of @dev's links to suppliers to "active". * * Links without the DL_FLAG_MANAGED flag set are ignored. */ void device_links_driver_bound(struct device *dev) { struct device_link *link, *ln; LIST_HEAD(sync_list); /* * If a device probes successfully, it's expected to have created all * the device links it needs to or make new device links as it needs * them. So, it no longer needs to wait on any suppliers. */ mutex_lock(&wfs_lock); list_del_init(&dev->links.needs_suppliers); mutex_unlock(&wfs_lock); device_remove_file(dev, &dev_attr_waiting_for_supplier); device_links_write_lock(); list_for_each_entry(link, &dev->links.consumers, s_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; /* * Links created during consumer probe may be in the "consumer * probe" state to start with if the supplier is still probing * when they are created and they may become "active" if the * consumer probe returns first. Skip them here. */ if (link->status == DL_STATE_CONSUMER_PROBE || link->status == DL_STATE_ACTIVE) continue; WARN_ON(link->status != DL_STATE_DORMANT); WRITE_ONCE(link->status, DL_STATE_AVAILABLE); if (link->flags & DL_FLAG_AUTOPROBE_CONSUMER) driver_deferred_probe_add(link->consumer); } if (defer_sync_state_count) __device_links_supplier_defer_sync(dev); else __device_links_queue_sync_state(dev, &sync_list); list_for_each_entry_safe(link, ln, &dev->links.suppliers, c_node) { struct device *supplier; if (!(link->flags & DL_FLAG_MANAGED)) continue; supplier = link->supplier; if (link->flags & DL_FLAG_SYNC_STATE_ONLY) { /* * When DL_FLAG_SYNC_STATE_ONLY is set, it means no * other DL_MANAGED_LINK_FLAGS have been set. So, it's * save to drop the managed link completely. */ device_link_drop_managed(link); } else { WARN_ON(link->status != DL_STATE_CONSUMER_PROBE); WRITE_ONCE(link->status, DL_STATE_ACTIVE); } /* * This needs to be done even for the deleted * DL_FLAG_SYNC_STATE_ONLY device link in case it was the last * device link that was preventing the supplier from getting a * sync_state() call. */ if (defer_sync_state_count) __device_links_supplier_defer_sync(supplier); else __device_links_queue_sync_state(supplier, &sync_list); } dev->links.status = DL_DEV_DRIVER_BOUND; device_links_write_unlock(); device_links_flush_sync_list(&sync_list, dev); } /** * __device_links_no_driver - Update links of a device without a driver. * @dev: Device without a drvier. * * Delete all non-persistent links from this device to any suppliers. * * Persistent links stay around, but their status is changed to "available", * unless they already are in the "supplier unbind in progress" state in which * case they need not be updated. * * Links without the DL_FLAG_MANAGED flag set are ignored. */ static void __device_links_no_driver(struct device *dev) { struct device_link *link, *ln; list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) { device_link_drop_managed(link); continue; } if (link->status != DL_STATE_CONSUMER_PROBE && link->status != DL_STATE_ACTIVE) continue; if (link->supplier->links.status == DL_DEV_DRIVER_BOUND) { WRITE_ONCE(link->status, DL_STATE_AVAILABLE); } else { WARN_ON(!(link->flags & DL_FLAG_SYNC_STATE_ONLY)); WRITE_ONCE(link->status, DL_STATE_DORMANT); } } dev->links.status = DL_DEV_NO_DRIVER; } /** * device_links_no_driver - Update links after failing driver probe. * @dev: Device whose driver has just failed to probe. * * Clean up leftover links to consumers for @dev and invoke * %__device_links_no_driver() to update links to suppliers for it as * appropriate. * * Links without the DL_FLAG_MANAGED flag set are ignored. */ void device_links_no_driver(struct device *dev) { struct device_link *link; device_links_write_lock(); list_for_each_entry(link, &dev->links.consumers, s_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; /* * The probe has failed, so if the status of the link is * "consumer probe" or "active", it must have been added by * a probing consumer while this device was still probing. * Change its state to "dormant", as it represents a valid * relationship, but it is not functionally meaningful. */ if (link->status == DL_STATE_CONSUMER_PROBE || link->status == DL_STATE_ACTIVE) WRITE_ONCE(link->status, DL_STATE_DORMANT); } __device_links_no_driver(dev); device_links_write_unlock(); } /** * device_links_driver_cleanup - Update links after driver removal. * @dev: Device whose driver has just gone away. * * Update links to consumers for @dev by changing their status to "dormant" and * invoke %__device_links_no_driver() to update links to suppliers for it as * appropriate. * * Links without the DL_FLAG_MANAGED flag set are ignored. */ void device_links_driver_cleanup(struct device *dev) { struct device_link *link, *ln; device_links_write_lock(); list_for_each_entry_safe(link, ln, &dev->links.consumers, s_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; WARN_ON(link->flags & DL_FLAG_AUTOREMOVE_CONSUMER); WARN_ON(link->status != DL_STATE_SUPPLIER_UNBIND); /* * autoremove the links between this @dev and its consumer * devices that are not active, i.e. where the link state * has moved to DL_STATE_SUPPLIER_UNBIND. */ if (link->status == DL_STATE_SUPPLIER_UNBIND && link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER) device_link_drop_managed(link); WRITE_ONCE(link->status, DL_STATE_DORMANT); } list_del_init(&dev->links.defer_hook); __device_links_no_driver(dev); device_links_write_unlock(); } /** * device_links_busy - Check if there are any busy links to consumers. * @dev: Device to check. * * Check each consumer of the device and return 'true' if its link's status * is one of "consumer probe" or "active" (meaning that the given consumer is * probing right now or its driver is present). Otherwise, change the link * state to "supplier unbind" to prevent the consumer from being probed * successfully going forward. * * Return 'false' if there are no probing or active consumers. * * Links without the DL_FLAG_MANAGED flag set are ignored. */ bool device_links_busy(struct device *dev) { struct device_link *link; bool ret = false; device_links_write_lock(); list_for_each_entry(link, &dev->links.consumers, s_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; if (link->status == DL_STATE_CONSUMER_PROBE || link->status == DL_STATE_ACTIVE) { ret = true; break; } WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND); } dev->links.status = DL_DEV_UNBINDING; device_links_write_unlock(); return ret; } /** * device_links_unbind_consumers - Force unbind consumers of the given device. * @dev: Device to unbind the consumers of. * * Walk the list of links to consumers for @dev and if any of them is in the * "consumer probe" state, wait for all device probes in progress to complete * and start over. * * If that's not the case, change the status of the link to "supplier unbind" * and check if the link was in the "active" state. If so, force the consumer * driver to unbind and start over (the consumer will not re-probe as we have * changed the state of the link already). * * Links without the DL_FLAG_MANAGED flag set are ignored. */ void device_links_unbind_consumers(struct device *dev) { struct device_link *link; start: device_links_write_lock(); list_for_each_entry(link, &dev->links.consumers, s_node) { enum device_link_state status; if (!(link->flags & DL_FLAG_MANAGED) || link->flags & DL_FLAG_SYNC_STATE_ONLY) continue; status = link->status; if (status == DL_STATE_CONSUMER_PROBE) { device_links_write_unlock(); wait_for_device_probe(); goto start; } WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND); if (status == DL_STATE_ACTIVE) { struct device *consumer = link->consumer; get_device(consumer); device_links_write_unlock(); device_release_driver_internal(consumer, NULL, consumer->parent); put_device(consumer); goto start; } } device_links_write_unlock(); } /** * device_links_purge - Delete existing links to other devices. * @dev: Target device. */ static void device_links_purge(struct device *dev) { struct device_link *link, *ln; if (dev->class == &devlink_class) return; mutex_lock(&wfs_lock); list_del_init(&dev->links.needs_suppliers); mutex_unlock(&wfs_lock); /* * Delete all of the remaining links from this device to any other * devices (either consumers or suppliers). */ device_links_write_lock(); list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) { WARN_ON(link->status == DL_STATE_ACTIVE); __device_link_del(&link->kref); } list_for_each_entry_safe_reverse(link, ln, &dev->links.consumers, s_node) { WARN_ON(link->status != DL_STATE_DORMANT && link->status != DL_STATE_NONE); __device_link_del(&link->kref); } device_links_write_unlock(); } static u32 fw_devlink_flags = DL_FLAG_SYNC_STATE_ONLY; static int __init fw_devlink_setup(char *arg) { if (!arg) return -EINVAL; if (strcmp(arg, "off") == 0) { fw_devlink_flags = 0; } else if (strcmp(arg, "permissive") == 0) { fw_devlink_flags = DL_FLAG_SYNC_STATE_ONLY; } else if (strcmp(arg, "on") == 0) { fw_devlink_flags = DL_FLAG_AUTOPROBE_CONSUMER; } else if (strcmp(arg, "rpm") == 0) { fw_devlink_flags = DL_FLAG_AUTOPROBE_CONSUMER | DL_FLAG_PM_RUNTIME; } return 0; } early_param("fw_devlink", fw_devlink_setup); u32 fw_devlink_get_flags(void) { return fw_devlink_flags; } static bool fw_devlink_is_permissive(void) { return fw_devlink_flags == DL_FLAG_SYNC_STATE_ONLY; } static void fw_devlink_link_device(struct device *dev) { int fw_ret; if (!fw_devlink_flags) return; mutex_lock(&defer_fw_devlink_lock); if (!defer_fw_devlink_count) device_link_add_missing_supplier_links(); /* * The device's fwnode not having add_links() doesn't affect if other * consumers can find this device as a supplier. So, this check is * intentionally placed after device_link_add_missing_supplier_links(). */ if (!fwnode_has_op(dev->fwnode, add_links)) goto out; /* * If fw_devlink is being deferred, assume all devices have mandatory * suppliers they need to link to later. Then, when the fw_devlink is * resumed, all these devices will get a chance to try and link to any * suppliers they have. */ if (!defer_fw_devlink_count) { fw_ret = fwnode_call_int_op(dev->fwnode, add_links, dev); if (fw_ret == -ENODEV && fw_devlink_is_permissive()) fw_ret = -EAGAIN; } else { fw_ret = -ENODEV; /* * defer_hook is not used to add device to deferred_sync list * until device is bound. Since deferred fw devlink also blocks * probing, same list hook can be used for deferred_fw_devlink. */ list_add_tail(&dev->links.defer_hook, &deferred_fw_devlink); } if (fw_ret == -ENODEV) device_link_wait_for_mandatory_supplier(dev); else if (fw_ret) device_link_wait_for_optional_supplier(dev); out: mutex_unlock(&defer_fw_devlink_lock); } /** * fw_devlink_pause - Pause parsing of fwnode to create device links * * Calling this function defers any fwnode parsing to create device links until * fw_devlink_resume() is called. Both these functions are ref counted and the * caller needs to match the calls. * * While fw_devlink is paused: * - Any device that is added won't have its fwnode parsed to create device * links. * - The probe of the device will also be deferred during this period. * - Any devices that were already added, but waiting for suppliers won't be * able to link to newly added devices. * * Once fw_devlink_resume(): * - All the fwnodes that was not parsed will be parsed. * - All the devices that were deferred probing will be reattempted if they * aren't waiting for any more suppliers. * * This pair of functions, is mainly meant to optimize the parsing of fwnodes * when a lot of devices that need to link to each other are added in a short * interval of time. For example, adding all the top level devices in a system. * * For example, if N devices are added and: * - All the consumers are added before their suppliers * - All the suppliers of the N devices are part of the N devices * * Then: * * - With the use of fw_devlink_pause() and fw_devlink_resume(), each device * will only need one parsing of its fwnode because it is guaranteed to find * all the supplier devices already registered and ready to link to. It won't * have to do another pass later to find one or more suppliers it couldn't * find in the first parse of the fwnode. So, we'll only need O(N) fwnode * parses. * * - Without the use of fw_devlink_pause() and fw_devlink_resume(), we would * end up doing O(N^2) parses of fwnodes because every device that's added is * guaranteed to trigger a parse of the fwnode of every device added before * it. This O(N^2) parse is made worse by the fact that when a fwnode of a * device is parsed, all it descendant devices might need to have their * fwnodes parsed too (even if the devices themselves aren't added). */ void fw_devlink_pause(void) { mutex_lock(&defer_fw_devlink_lock); defer_fw_devlink_count++; mutex_unlock(&defer_fw_devlink_lock); } /** fw_devlink_resume - Resume parsing of fwnode to create device links * * This function is used in conjunction with fw_devlink_pause() and is ref * counted. See documentation for fw_devlink_pause() for more details. */ void fw_devlink_resume(void) { struct device *dev, *tmp; LIST_HEAD(probe_list); mutex_lock(&defer_fw_devlink_lock); if (!defer_fw_devlink_count) { WARN(true, "Unmatched fw_devlink pause/resume!"); goto out; } defer_fw_devlink_count--; if (defer_fw_devlink_count) goto out; device_link_add_missing_supplier_links(); list_splice_tail_init(&deferred_fw_devlink, &probe_list); out: mutex_unlock(&defer_fw_devlink_lock); /* * bus_probe_device() can cause new devices to get added and they'll * try to grab defer_fw_devlink_lock. So, this needs to be done outside * the defer_fw_devlink_lock. */ list_for_each_entry_safe(dev, tmp, &probe_list, links.defer_hook) { list_del_init(&dev->links.defer_hook); bus_probe_device(dev); } } /* Device links support end. */ int (*platform_notify)(struct device *dev) = NULL; int (*platform_notify_remove)(struct device *dev) = NULL; static struct kobject *dev_kobj; struct kobject *sysfs_dev_char_kobj; struct kobject *sysfs_dev_block_kobj; static DEFINE_MUTEX(device_hotplug_lock); void lock_device_hotplug(void) { mutex_lock(&device_hotplug_lock); } void unlock_device_hotplug(void) { mutex_unlock(&device_hotplug_lock); } int lock_device_hotplug_sysfs(void) { if (mutex_trylock(&device_hotplug_lock)) return 0; /* Avoid busy looping (5 ms of sleep should do). */ msleep(5); return restart_syscall(); } #ifdef CONFIG_BLOCK static inline int device_is_not_partition(struct device *dev) { return !(dev->type == &part_type); } #else static inline int device_is_not_partition(struct device *dev) { return 1; } #endif static int device_platform_notify(struct device *dev, enum kobject_action action) { int ret; ret = acpi_platform_notify(dev, action); if (ret) return ret; ret = software_node_notify(dev, action); if (ret) return ret; if (platform_notify && action == KOBJ_ADD) platform_notify(dev); else if (platform_notify_remove && action == KOBJ_REMOVE) platform_notify_remove(dev); return 0; } /** * dev_driver_string - Return a device's driver name, if at all possible * @dev: struct device to get the name of * * Will return the device's driver's name if it is bound to a device. If * the device is not bound to a driver, it will return the name of the bus * it is attached to. If it is not attached to a bus either, an empty * string will be returned. */ const char *dev_driver_string(const struct device *dev) { struct device_driver *drv; /* dev->driver can change to NULL underneath us because of unbinding, * so be careful about accessing it. dev->bus and dev->class should * never change once they are set, so they don't need special care. */ drv = READ_ONCE(dev->driver); return drv ? drv->name : dev_bus_name(dev); } EXPORT_SYMBOL(dev_driver_string); #define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr) static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct device_attribute *dev_attr = to_dev_attr(attr); struct device *dev = kobj_to_dev(kobj); ssize_t ret = -EIO; if (dev_attr->show) ret = dev_attr->show(dev, dev_attr, buf); if (ret >= (ssize_t)PAGE_SIZE) { printk("dev_attr_show: %pS returned bad count\n", dev_attr->show); } return ret; } static ssize_t dev_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct device_attribute *dev_attr = to_dev_attr(attr); struct device *dev = kobj_to_dev(kobj); ssize_t ret = -EIO; if (dev_attr->store) ret = dev_attr->store(dev, dev_attr, buf, count); return ret; } static const struct sysfs_ops dev_sysfs_ops = { .show = dev_attr_show, .store = dev_attr_store, }; #define to_ext_attr(x) container_of(x, struct dev_ext_attribute, attr) ssize_t device_store_ulong(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { struct dev_ext_attribute *ea = to_ext_attr(attr); int ret; unsigned long new; ret = kstrtoul(buf, 0, &new); if (ret) return ret; *(unsigned long *)(ea->var) = new; /* Always return full write size even if we didn't consume all */ return size; } EXPORT_SYMBOL_GPL(device_store_ulong); ssize_t device_show_ulong(struct device *dev, struct device_attribute *attr, char *buf) { struct dev_ext_attribute *ea = to_ext_attr(attr); return sysfs_emit(buf, "%lx\n", *(unsigned long *)(ea->var)); } EXPORT_SYMBOL_GPL(device_show_ulong); ssize_t device_store_int(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { struct dev_ext_attribute *ea = to_ext_attr(attr); int ret; long new; ret = kstrtol(buf, 0, &new); if (ret) return ret; if (new > INT_MAX || new < INT_MIN) return -EINVAL; *(int *)(ea->var) = new; /* Always return full write size even if we didn't consume all */ return size; } EXPORT_SYMBOL_GPL(device_store_int); ssize_t device_show_int(struct device *dev, struct device_attribute *attr, char *buf) { struct dev_ext_attribute *ea = to_ext_attr(attr); return sysfs_emit(buf, "%d\n", *(int *)(ea->var)); } EXPORT_SYMBOL_GPL(device_show_int); ssize_t device_store_bool(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { struct dev_ext_attribute *ea = to_ext_attr(attr); if (strtobool(buf, ea->var) < 0) return -EINVAL; return size; } EXPORT_SYMBOL_GPL(device_store_bool); ssize_t device_show_bool(struct device *dev, struct device_attribute *attr, char *buf) { struct dev_ext_attribute *ea = to_ext_attr(attr); return sysfs_emit(buf, "%d\n", *(bool *)(ea->var)); } EXPORT_SYMBOL_GPL(device_show_bool); /** * device_release - free device structure. * @kobj: device's kobject. * * This is called once the reference count for the object * reaches 0. We forward the call to the device's release * method, which should handle actually freeing the structure. */ static void device_release(struct kobject *kobj) { struct device *dev = kobj_to_dev(kobj); struct device_private *p = dev->p; /* * Some platform devices are driven without driver attached * and managed resources may have been acquired. Make sure * all resources are released. * * Drivers still can add resources into device after device * is deleted but alive, so release devres here to avoid * possible memory leak. */ devres_release_all(dev); kfree(dev->dma_range_map); if (dev->release) dev->release(dev); else if (dev->type && dev->type->release) dev->type->release(dev); else if (dev->class && dev->class->dev_release) dev->class->dev_release(dev); else WARN(1, KERN_ERR "Device '%s' does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n", dev_name(dev)); kfree(p); } static const void *device_namespace(struct kobject *kobj) { struct device *dev = kobj_to_dev(kobj); const void *ns = NULL; if (dev->class && dev->class->ns_type) ns = dev->class->namespace(dev); return ns; } static void device_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) { struct device *dev = kobj_to_dev(kobj); if (dev->class && dev->class->get_ownership) dev->class->get_ownership(dev, uid, gid); } static struct kobj_type device_ktype = { .release = device_release, .sysfs_ops = &dev_sysfs_ops, .namespace = device_namespace, .get_ownership = device_get_ownership, }; static int dev_uevent_filter(struct kset *kset, struct kobject *kobj) { struct kobj_type *ktype = get_ktype(kobj); if (ktype == &device_ktype) { struct device *dev = kobj_to_dev(kobj); if (dev->bus) return 1; if (dev->class) return 1; } return 0; } static const char *dev_uevent_name(struct kset *kset, struct kobject *kobj) { struct device *dev = kobj_to_dev(kobj); if (dev->bus) return dev->bus->name; if (dev->class) return dev->class->name; return NULL; } static int dev_uevent(struct kset *kset, struct kobject *kobj, struct kobj_uevent_env *env) { struct device *dev = kobj_to_dev(kobj); int retval = 0; /* add device node properties if present */ if (MAJOR(dev->devt)) { const char *tmp; const char *name; umode_t mode = 0; kuid_t uid = GLOBAL_ROOT_UID; kgid_t gid = GLOBAL_ROOT_GID; add_uevent_var(env, "MAJOR=%u", MAJOR(dev->devt)); add_uevent_var(env, "MINOR=%u", MINOR(dev->devt)); name = device_get_devnode(dev, &mode, &uid, &gid, &tmp); if (name) { add_uevent_var(env, "DEVNAME=%s", name); if (mode) add_uevent_var(env, "DEVMODE=%#o", mode & 0777); if (!uid_eq(uid, GLOBAL_ROOT_UID)) add_uevent_var(env, "DEVUID=%u", from_kuid(&init_user_ns, uid)); if (!gid_eq(gid, GLOBAL_ROOT_GID)) add_uevent_var(env, "DEVGID=%u", from_kgid(&init_user_ns, gid)); kfree(tmp); } } if (dev->type && dev->type->name) add_uevent_var(env, "DEVTYPE=%s", dev->type->name); if (dev->driver) add_uevent_var(env, "DRIVER=%s", dev->driver->name); /* Add common DT information about the device */ of_device_uevent(dev, env); /* have the bus specific function add its stuff */ if (dev->bus && dev->bus->uevent) { retval = dev->bus->uevent(dev, env); if (retval) pr_debug("device: '%s': %s: bus uevent() returned %d\n", dev_name(dev), __func__, retval); } /* have the class specific function add its stuff */ if (dev->class && dev->class->dev_uevent) { retval = dev->class->dev_uevent(dev, env); if (retval) pr_debug("device: '%s': %s: class uevent() " "returned %d\n", dev_name(dev), __func__, retval); } /* have the device type specific function add its stuff */ if (dev->type && dev->type->uevent) { retval = dev->type->uevent(dev, env); if (retval) pr_debug("device: '%s': %s: dev_type uevent() " "returned %d\n", dev_name(dev), __func__, retval); } return retval; } static const struct kset_uevent_ops device_uevent_ops = { .filter = dev_uevent_filter, .name = dev_uevent_name, .uevent = dev_uevent, }; static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, char *buf) { struct kobject *top_kobj; struct kset *kset; struct kobj_uevent_env *env = NULL; int i; int len = 0; int retval; /* search the kset, the device belongs to */ top_kobj = &dev->kobj; while (!top_kobj->kset && top_kobj->parent) top_kobj = top_kobj->parent; if (!top_kobj->kset) goto out; kset = top_kobj->kset; if (!kset->uevent_ops || !kset->uevent_ops->uevent) goto out; /* respect filter */ if (kset->uevent_ops && kset->uevent_ops->filter) if (!kset->uevent_ops->filter(kset, &dev->kobj)) goto out; env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL); if (!env) return -ENOMEM; /* let the kset specific function add its keys */ retval = kset->uevent_ops->uevent(kset, &dev->kobj, env); if (retval) goto out; /* copy keys to file */ for (i = 0; i < env->envp_idx; i++) len += sysfs_emit_at(buf, len, "%s\n", env->envp[i]); out: kfree(env); return len; } static ssize_t uevent_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int rc; rc = kobject_synth_uevent(&dev->kobj, buf, count); if (rc) { dev_err(dev, "uevent: failed to send synthetic uevent\n"); return rc; } return count; } static DEVICE_ATTR_RW(uevent); static ssize_t online_show(struct device *dev, struct device_attribute *attr, char *buf) { bool val; device_lock(dev); val = !dev->offline; device_unlock(dev); return sysfs_emit(buf, "%u\n", val); } static ssize_t online_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { bool val; int ret; ret = strtobool(buf, &val); if (ret < 0) return ret; ret = lock_device_hotplug_sysfs(); if (ret) return ret; ret = val ? device_online(dev) : device_offline(dev); unlock_device_hotplug(); return ret < 0 ? ret : count; } static DEVICE_ATTR_RW(online); int device_add_groups(struct device *dev, const struct attribute_group **groups) { return sysfs_create_groups(&dev->kobj, groups); } EXPORT_SYMBOL_GPL(device_add_groups); void device_remove_groups(struct device *dev, const struct attribute_group **groups) { sysfs_remove_groups(&dev->kobj, groups); } EXPORT_SYMBOL_GPL(device_remove_groups); union device_attr_group_devres { const struct attribute_group *group; const struct attribute_group **groups; }; static int devm_attr_group_match(struct device *dev, void *res, void *data) { return ((union device_attr_group_devres *)res)->group == data; } static void devm_attr_group_remove(struct device *dev, void *res) { union device_attr_group_devres *devres = res; const struct attribute_group *group = devres->group; dev_dbg(dev, "%s: removing group %p\n", __func__, group); sysfs_remove_group(&dev->kobj, group); } static void devm_attr_groups_remove(struct device *dev, void *res) { union device_attr_group_devres *devres = res; const struct attribute_group **groups = devres->groups; dev_dbg(dev, "%s: removing groups %p\n", __func__, groups); sysfs_remove_groups(&dev->kobj, groups); } /** * devm_device_add_group - given a device, create a managed attribute group * @dev: The device to create the group for * @grp: The attribute group to create * * This function creates a group for the first time. It will explicitly * warn and error if any of the attribute files being created already exist. * * Returns 0 on success or error code on failure. */ int devm_device_add_group(struct device *dev, const struct attribute_group *grp) { union device_attr_group_devres *devres; int error; devres = devres_alloc(devm_attr_group_remove, sizeof(*devres), GFP_KERNEL); if (!devres) return -ENOMEM; error = sysfs_create_group(&dev->kobj, grp); if (error) { devres_free(devres); return error; } devres->group = grp; devres_add(dev, devres); return 0; } EXPORT_SYMBOL_GPL(devm_device_add_group); /** * devm_device_remove_group: remove a managed group from a device * @dev: device to remove the group from * @grp: group to remove * * This function removes a group of attributes from a device. The attributes * previously have to have been created for this group, otherwise it will fail. */ void devm_device_remove_group(struct device *dev, const struct attribute_group *grp) { WARN_ON(devres_release(dev, devm_attr_group_remove, devm_attr_group_match, /* cast away const */ (void *)grp)); } EXPORT_SYMBOL_GPL(devm_device_remove_group); /** * devm_device_add_groups - create a bunch of managed attribute groups * @dev: The device to create the group for * @groups: The attribute groups to create, NULL terminated * * This function creates a bunch of managed attribute groups. If an error * occurs when creating a group, all previously created groups will be * removed, unwinding everything back to the original state when this * function was called. It will explicitly warn and error if any of the * attribute files being created already exist. * * Returns 0 on success or error code from sysfs_create_group on failure. */ int devm_device_add_groups(struct device *dev, const struct attribute_group **groups) { union device_attr_group_devres *devres; int error; devres = devres_alloc(devm_attr_groups_remove, sizeof(*devres), GFP_KERNEL); if (!devres) return -ENOMEM; error = sysfs_create_groups(&dev->kobj, groups); if (error) { devres_free(devres); return error; } devres->groups = groups; devres_add(dev, devres); return 0; } EXPORT_SYMBOL_GPL(devm_device_add_groups); /** * devm_device_remove_groups - remove a list of managed groups * * @dev: The device for the groups to be removed from * @groups: NULL terminated list of groups to be removed * * If groups is not NULL, remove the specified groups from the device. */ void devm_device_remove_groups(struct device *dev, const struct attribute_group **groups) { WARN_ON(devres_release(dev, devm_attr_groups_remove, devm_attr_group_match, /* cast away const */ (void *)groups)); } EXPORT_SYMBOL_GPL(devm_device_remove_groups); static int device_add_attrs(struct device *dev) { struct class *class = dev->class; const struct device_type *type = dev->type; int error; if (class) { error = device_add_groups(dev, class->dev_groups); if (error) return error; } if (type) { error = device_add_groups(dev, type->groups); if (error) goto err_remove_class_groups; } error = device_add_groups(dev, dev->groups); if (error) goto err_remove_type_groups; if (device_supports_offline(dev) && !dev->offline_disabled) { error = device_create_file(dev, &dev_attr_online); if (error) goto err_remove_dev_groups; } if (fw_devlink_flags && !fw_devlink_is_permissive()) { error = device_create_file(dev, &dev_attr_waiting_for_supplier); if (error) goto err_remove_dev_online; } return 0; err_remove_dev_online: device_remove_file(dev, &dev_attr_online); err_remove_dev_groups: device_remove_groups(dev, dev->groups); err_remove_type_groups: if (type) device_remove_groups(dev, type->groups); err_remove_class_groups: if (class) device_remove_groups(dev, class->dev_groups); return error; } static void device_remove_attrs(struct device *dev) { struct class *class = dev->class; const struct device_type *type = dev->type; device_remove_file(dev, &dev_attr_waiting_for_supplier); device_remove_file(dev, &dev_attr_online); device_remove_groups(dev, dev->groups); if (type) device_remove_groups(dev, type->groups); if (class) device_remove_groups(dev, class->dev_groups); } static ssize_t dev_show(struct device *dev, struct device_attribute *attr, char *buf) { return print_dev_t(buf, dev->devt); } static DEVICE_ATTR_RO(dev); /* /sys/devices/ */ struct kset *devices_kset; /** * devices_kset_move_before - Move device in the devices_kset's list. * @deva: Device to move. * @devb: Device @deva should come before. */ static void devices_kset_move_before(struct device *deva, struct device *devb) { if (!devices_kset) return; pr_debug("devices_kset: Moving %s before %s\n", dev_name(deva), dev_name(devb)); spin_lock(&devices_kset->list_lock); list_move_tail(&deva->kobj.entry, &devb->kobj.entry); spin_unlock(&devices_kset->list_lock); } /** * devices_kset_move_after - Move device in the devices_kset's list. * @deva: Device to move * @devb: Device @deva should come after. */ static void devices_kset_move_after(struct device *deva, struct device *devb) { if (!devices_kset) return; pr_debug("devices_kset: Moving %s after %s\n", dev_name(deva), dev_name(devb)); spin_lock(&devices_kset->list_lock); list_move(&deva->kobj.entry, &devb->kobj.entry); spin_unlock(&devices_kset->list_lock); } /** * devices_kset_move_last - move the device to the end of devices_kset's list. * @dev: device to move */ void devices_kset_move_last(struct device *dev) { if (!devices_kset) return; pr_debug("devices_kset: Moving %s to end of list\n", dev_name(dev)); spin_lock(&devices_kset->list_lock); list_move_tail(&dev->kobj.entry, &devices_kset->list); spin_unlock(&devices_kset->list_lock); } /** * device_create_file - create sysfs attribute file for device. * @dev: device. * @attr: device attribute descriptor. */ int device_create_file(struct device *dev, const struct device_attribute *attr) { int error = 0; if (dev) { WARN(((attr->attr.mode & S_IWUGO) && !attr->store), "Attribute %s: write permission without 'store'\n", attr->attr.name); WARN(((attr->attr.mode & S_IRUGO) && !attr->show), "Attribute %s: read permission without 'show'\n", attr->attr.name); error = sysfs_create_file(&dev->kobj, &attr->attr); } return error; } EXPORT_SYMBOL_GPL(device_create_file); /** * device_remove_file - remove sysfs attribute file. * @dev: device. * @attr: device attribute descriptor. */ void device_remove_file(struct device *dev, const struct device_attribute *attr) { if (dev) sysfs_remove_file(&dev->kobj, &attr->attr); } EXPORT_SYMBOL_GPL(device_remove_file); /** * device_remove_file_self - remove sysfs attribute file from its own method. * @dev: device. * @attr: device attribute descriptor. * * See kernfs_remove_self() for details. */ bool device_remove_file_self(struct device *dev, const struct device_attribute *attr) { if (dev) return sysfs_remove_file_self(&dev->kobj, &attr->attr); else return false; } EXPORT_SYMBOL_GPL(device_remove_file_self); /** * device_create_bin_file - create sysfs binary attribute file for device. * @dev: device. * @attr: device binary attribute descriptor. */ int device_create_bin_file(struct device *dev, const struct bin_attribute *attr) { int error = -EINVAL; if (dev) error = sysfs_create_bin_file(&dev->kobj, attr); return error; } EXPORT_SYMBOL_GPL(device_create_bin_file); /** * device_remove_bin_file - remove sysfs binary attribute file * @dev: device. * @attr: device binary attribute descriptor. */ void device_remove_bin_file(struct device *dev, const struct bin_attribute *attr) { if (dev) sysfs_remove_bin_file(&dev->kobj, attr); } EXPORT_SYMBOL_GPL(device_remove_bin_file); static void klist_children_get(struct klist_node *n) { struct device_private *p = to_device_private_parent(n); struct device *dev = p->device; get_device(dev); } static void klist_children_put(struct klist_node *n) { struct device_private *p = to_device_private_parent(n); struct device *dev = p->device; put_device(dev); } /** * device_initialize - init device structure. * @dev: device. * * This prepares the device for use by other layers by initializing * its fields. * It is the first half of device_register(), if called by * that function, though it can also be called separately, so one * may use @dev's fields. In particular, get_device()/put_device() * may be used for reference counting of @dev after calling this * function. * * All fields in @dev must be initialized by the caller to 0, except * for those explicitly set to some other value. The simplest * approach is to use kzalloc() to allocate the structure containing * @dev. * * NOTE: Use put_device() to give up your reference instead of freeing * @dev directly once you have called this function. */ void device_initialize(struct device *dev) { dev->kobj.kset = devices_kset; kobject_init(&dev->kobj, &device_ktype); INIT_LIST_HEAD(&dev->dma_pools); mutex_init(&dev->mutex); #ifdef CONFIG_PROVE_LOCKING mutex_init(&dev->lockdep_mutex); #endif lockdep_set_novalidate_class(&dev->mutex); spin_lock_init(&dev->devres_lock); INIT_LIST_HEAD(&dev->devres_head); device_pm_init(dev); set_dev_node(dev, -1); #ifdef CONFIG_GENERIC_MSI_IRQ raw_spin_lock_init(&dev->msi_lock); INIT_LIST_HEAD(&dev->msi_list); #endif INIT_LIST_HEAD(&dev->links.consumers); INIT_LIST_HEAD(&dev->links.suppliers); INIT_LIST_HEAD(&dev->links.needs_suppliers); INIT_LIST_HEAD(&dev->links.defer_hook); dev->links.status = DL_DEV_NO_DRIVER; } EXPORT_SYMBOL_GPL(device_initialize); struct kobject *virtual_device_parent(struct device *dev) { static struct kobject *virtual_dir = NULL; if (!virtual_dir) virtual_dir = kobject_create_and_add("virtual", &devices_kset->kobj); return virtual_dir; } struct class_dir { struct kobject kobj; struct class *class; }; #define to_class_dir(obj) container_of(obj, struct class_dir, kobj) static void class_dir_release(struct kobject *kobj) { struct class_dir *dir = to_class_dir(kobj); kfree(dir); } static const struct kobj_ns_type_operations *class_dir_child_ns_type(struct kobject *kobj) { struct class_dir *dir = to_class_dir(kobj); return dir->class->ns_type; } static struct kobj_type class_dir_ktype = { .release = class_dir_release, .sysfs_ops = &kobj_sysfs_ops, .child_ns_type = class_dir_child_ns_type }; static struct kobject * class_dir_create_and_add(struct class *class, struct kobject *parent_kobj) { struct class_dir *dir; int retval; dir = kzalloc(sizeof(*dir), GFP_KERNEL); if (!dir) return ERR_PTR(-ENOMEM); dir->class = class; kobject_init(&dir->kobj, &class_dir_ktype); dir->kobj.kset = &class->p->glue_dirs; retval = kobject_add(&dir->kobj, parent_kobj, "%s", class->name); if (retval < 0) { kobject_put(&dir->kobj); return ERR_PTR(retval); } return &dir->kobj; } static DEFINE_MUTEX(gdp_mutex); static struct kobject *get_device_parent(struct device *dev, struct device *parent) { if (dev->class) { struct kobject *kobj = NULL; struct kobject *parent_kobj; struct kobject *k; #ifdef CONFIG_BLOCK /* block disks show up in /sys/block */ if (sysfs_deprecated && dev->class == &block_class) { if (parent && parent->class == &block_class) return &parent->kobj; return &block_class.p->subsys.kobj; } #endif /* * If we have no parent, we live in "virtual". * Class-devices with a non class-device as parent, live * in a "glue" directory to prevent namespace collisions. */ if (parent == NULL) parent_kobj = virtual_device_parent(dev); else if (parent->class && !dev->class->ns_type) return &parent->kobj; else parent_kobj = &parent->kobj; mutex_lock(&gdp_mutex); /* find our class-directory at the parent and reference it */ spin_lock(&dev->class->p->glue_dirs.list_lock); list_for_each_entry(k, &dev->class->p->glue_dirs.list, entry) if (k->parent == parent_kobj) { kobj = kobject_get(k); break; } spin_unlock(&dev->class->p->glue_dirs.list_lock); if (kobj) { mutex_unlock(&gdp_mutex); return kobj; } /* or create a new class-directory at the parent device */ k = class_dir_create_and_add(dev->class, parent_kobj); /* do not emit an uevent for this simple "glue" directory */ mutex_unlock(&gdp_mutex); return k; } /* subsystems can specify a default root directory for their devices */ if (!parent && dev->bus && dev->bus->dev_root) return &dev->bus->dev_root->kobj; if (parent) return &parent->kobj; return NULL; } static inline bool live_in_glue_dir(struct kobject *kobj, struct device *dev) { if (!kobj || !dev->class || kobj->kset != &dev->class->p->glue_dirs) return false; return true; } static inline struct kobject *get_glue_dir(struct device *dev) { return dev->kobj.parent; } /* * make sure cleaning up dir as the last step, we need to make * sure .release handler of kobject is run with holding the * global lock */ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir) { unsigned int ref; /* see if we live in a "glue" directory */ if (!live_in_glue_dir(glue_dir, dev)) return; mutex_lock(&gdp_mutex); /** * There is a race condition between removing glue directory * and adding a new device under the glue directory. * * CPU1: CPU2: * * device_add() * get_device_parent() * class_dir_create_and_add() * kobject_add_internal() * create_dir() // create glue_dir * * device_add() * get_device_parent() * kobject_get() // get glue_dir * * device_del() * cleanup_glue_dir() * kobject_del(glue_dir) * * kobject_add() * kobject_add_internal() * create_dir() // in glue_dir * sysfs_create_dir_ns() * kernfs_create_dir_ns(sd) * * sysfs_remove_dir() // glue_dir->sd=NULL * sysfs_put() // free glue_dir->sd * * // sd is freed * kernfs_new_node(sd) * kernfs_get(glue_dir) * kernfs_add_one() * kernfs_put() * * Before CPU1 remove last child device under glue dir, if CPU2 add * a new device under glue dir, the glue_dir kobject reference count * will be increase to 2 in kobject_get(k). And CPU2 has been called * kernfs_create_dir_ns(). Meanwhile, CPU1 call sysfs_remove_dir() * and sysfs_put(). This result in glue_dir->sd is freed. * * Then the CPU2 will see a stale "empty" but still potentially used * glue dir around in kernfs_new_node(). * * In order to avoid this happening, we also should make sure that * kernfs_node for glue_dir is released in CPU1 only when refcount * for glue_dir kobj is 1. */ ref = kref_read(&glue_dir->kref); if (!kobject_has_children(glue_dir) && !--ref) kobject_del(glue_dir); kobject_put(glue_dir); mutex_unlock(&gdp_mutex); } static int device_add_class_symlinks(struct device *dev) { struct device_node *of_node = dev_of_node(dev); int error; if (of_node) { error = sysfs_create_link(&dev->kobj, of_node_kobj(of_node), "of_node"); if (error) dev_warn(dev, "Error %d creating of_node link\n",error); /* An error here doesn't warrant bringing down the device */ } if (!dev->class) return 0; error = sysfs_create_link(&dev->kobj, &dev->class->p->subsys.kobj, "subsystem"); if (error) goto out_devnode; if (dev->parent && device_is_not_partition(dev)) { error = sysfs_create_link(&dev->kobj, &dev->parent->kobj, "device"); if (error) goto out_subsys; } #ifdef CONFIG_BLOCK /* /sys/block has directories and does not need symlinks */ if (sysfs_deprecated && dev->class == &block_class) return 0; #endif /* link in the class directory pointing to the device */ error = sysfs_create_link(&dev->class->p->subsys.kobj, &dev->kobj, dev_name(dev)); if (error) goto out_device; return 0; out_device: sysfs_remove_link(&dev->kobj, "device"); out_subsys: sysfs_remove_link(&dev->kobj, "subsystem"); out_devnode: sysfs_remove_link(&dev->kobj, "of_node"); return error; } static void device_remove_class_symlinks(struct device *dev) { if (dev_of_node(dev)) sysfs_remove_link(&dev->kobj, "of_node"); if (!dev->class) return; if (dev->parent && device_is_not_partition(dev)) sysfs_remove_link(&dev->kobj, "device"); sysfs_remove_link(&dev->kobj, "subsystem"); #ifdef CONFIG_BLOCK if (sysfs_deprecated && dev->class == &block_class) return; #endif sysfs_delete_link(&dev->class->p->subsys.kobj, &dev->kobj, dev_name(dev)); } /** * dev_set_name - set a device name * @dev: device * @fmt: format string for the device's name */ int dev_set_name(struct device *dev, const char *fmt, ...) { va_list vargs; int err; va_start(vargs, fmt); err = kobject_set_name_vargs(&dev->kobj, fmt, vargs); va_end(vargs); return err; } EXPORT_SYMBOL_GPL(dev_set_name); /** * device_to_dev_kobj - select a /sys/dev/ directory for the device * @dev: device * * By default we select char/ for new entries. Setting class->dev_obj * to NULL prevents an entry from being created. class->dev_kobj must * be set (or cleared) before any devices are registered to the class * otherwise device_create_sys_dev_entry() and * device_remove_sys_dev_entry() will disagree about the presence of * the link. */ static struct kobject *device_to_dev_kobj(struct device *dev) { struct kobject *kobj; if (dev->class) kobj = dev->class->dev_kobj; else kobj = sysfs_dev_char_kobj; return kobj; } static int device_create_sys_dev_entry(struct device *dev) { struct kobject *kobj = device_to_dev_kobj(dev); int error = 0; char devt_str[15]; if (kobj) { format_dev_t(devt_str, dev->devt); error = sysfs_create_link(kobj, &dev->kobj, devt_str); } return error; } static void device_remove_sys_dev_entry(struct device *dev) { struct kobject *kobj = device_to_dev_kobj(dev); char devt_str[15]; if (kobj) { format_dev_t(devt_str, dev->devt); sysfs_remove_link(kobj, devt_str); } } static int device_private_init(struct device *dev) { dev->p = kzalloc(sizeof(*dev->p), GFP_KERNEL); if (!dev->p) return -ENOMEM; dev->p->device = dev; klist_init(&dev->p->klist_children, klist_children_get, klist_children_put); INIT_LIST_HEAD(&dev->p->deferred_probe); return 0; } /** * device_add - add device to device hierarchy. * @dev: device. * * This is part 2 of device_register(), though may be called * separately _iff_ device_initialize() has been called separately. * * This adds @dev to the kobject hierarchy via kobject_add(), adds it * to the global and sibling lists for the device, then * adds it to the other relevant subsystems of the driver model. * * Do not call this routine or device_register() more than once for * any device structure. The driver model core is not designed to work * with devices that get unregistered and then spring back to life. * (Among other things, it's very hard to guarantee that all references * to the previous incarnation of @dev have been dropped.) Allocate * and register a fresh new struct device instead. * * NOTE: _Never_ directly free @dev after calling this function, even * if it returned an error! Always use put_device() to give up your * reference instead. * * Rule of thumb is: if device_add() succeeds, you should call * device_del() when you want to get rid of it. If device_add() has * *not* succeeded, use *only* put_device() to drop the reference * count. */ int device_add(struct device *dev) { struct device *parent; struct kobject *kobj; struct class_interface *class_intf; int error = -EINVAL; struct kobject *glue_dir = NULL; dev = get_device(dev); if (!dev) goto done; if (!dev->p) { error = device_private_init(dev); if (error) goto done; } /* * for statically allocated devices, which should all be converted * some day, we need to initialize the name. We prevent reading back * the name, and force the use of dev_name() */ if (dev->init_name) { dev_set_name(dev, "%s", dev->init_name); dev->init_name = NULL; } /* subsystems can specify simple device enumeration */ if (!dev_name(dev) && dev->bus && dev->bus->dev_name) dev_set_name(dev, "%s%u", dev->bus->dev_name, dev->id); if (!dev_name(dev)) { error = -EINVAL; goto name_error; } pr_debug("device: '%s': %s\n", dev_name(dev), __func__); parent = get_device(dev->parent); kobj = get_device_parent(dev, parent); if (IS_ERR(kobj)) { error = PTR_ERR(kobj); goto parent_error; } if (kobj) dev->kobj.parent = kobj; /* use parent numa_node */ if (parent && (dev_to_node(dev) == NUMA_NO_NODE)) set_dev_node(dev, dev_to_node(parent)); /* first, register with generic layer. */ /* we require the name to be set before, and pass NULL */ error = kobject_add(&dev->kobj, dev->kobj.parent, NULL); if (error) { glue_dir = get_glue_dir(dev); goto Error; } /* notify platform of device entry */ error = device_platform_notify(dev, KOBJ_ADD); if (error) goto platform_error; error = device_create_file(dev, &dev_attr_uevent); if (error) goto attrError; error = device_add_class_symlinks(dev); if (error) goto SymlinkError; error = device_add_attrs(dev); if (error) goto AttrsError; error = bus_add_device(dev); if (error) goto BusError; error = dpm_sysfs_add(dev); if (error) goto DPMError; device_pm_add(dev); if (MAJOR(dev->devt)) { error = device_create_file(dev, &dev_attr_dev); if (error) goto DevAttrError; error = device_create_sys_dev_entry(dev); if (error) goto SysEntryError; devtmpfs_create_node(dev); } /* Notify clients of device addition. This call must come * after dpm_sysfs_add() and before kobject_uevent(). */ if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_ADD_DEVICE, dev); kobject_uevent(&dev->kobj, KOBJ_ADD); /* * Check if any of the other devices (consumers) have been waiting for * this device (supplier) to be added so that they can create a device * link to it. * * This needs to happen after device_pm_add() because device_link_add() * requires the supplier be registered before it's called. * * But this also needs to happen before bus_probe_device() to make sure * waiting consumers can link to it before the driver is bound to the * device and the driver sync_state callback is called for this device. */ if (dev->fwnode && !dev->fwnode->dev) { dev->fwnode->dev = dev; fw_devlink_link_device(dev); } bus_probe_device(dev); if (parent) klist_add_tail(&dev->p->knode_parent, &parent->p->klist_children); if (dev->class) { mutex_lock(&dev->class->p->mutex); /* tie the class to the device */ klist_add_tail(&dev->p->knode_class, &dev->class->p->klist_devices); /* notify any interfaces that the device is here */ list_for_each_entry(class_intf, &dev->class->p->interfaces, node) if (class_intf->add_dev) class_intf->add_dev(dev, class_intf); mutex_unlock(&dev->class->p->mutex); } done: put_device(dev); return error; SysEntryError: if (MAJOR(dev->devt)) device_remove_file(dev, &dev_attr_dev); DevAttrError: device_pm_remove(dev); dpm_sysfs_remove(dev); DPMError: bus_remove_device(dev); BusError: device_remove_attrs(dev); AttrsError: device_remove_class_symlinks(dev); SymlinkError: device_remove_file(dev, &dev_attr_uevent); attrError: device_platform_notify(dev, KOBJ_REMOVE); platform_error: kobject_uevent(&dev->kobj, KOBJ_REMOVE); glue_dir = get_glue_dir(dev); kobject_del(&dev->kobj); Error: cleanup_glue_dir(dev, glue_dir); parent_error: put_device(parent); name_error: kfree(dev->p); dev->p = NULL; goto done; } EXPORT_SYMBOL_GPL(device_add); /** * device_register - register a device with the system. * @dev: pointer to the device structure * * This happens in two clean steps - initialize the device * and add it to the system. The two steps can be called * separately, but this is the easiest and most common. * I.e. you should only call the two helpers separately if * have a clearly defined need to use and refcount the device * before it is added to the hierarchy. * * For more information, see the kerneldoc for device_initialize() * and device_add(). * * NOTE: _Never_ directly free @dev after calling this function, even * if it returned an error! Always use put_device() to give up the * reference initialized in this function instead. */ int device_register(struct device *dev) { device_initialize(dev); return device_add(dev); } EXPORT_SYMBOL_GPL(device_register); /** * get_device - increment reference count for device. * @dev: device. * * This simply forwards the call to kobject_get(), though * we do take care to provide for the case that we get a NULL * pointer passed in. */ struct device *get_device(struct device *dev) { return dev ? kobj_to_dev(kobject_get(&dev->kobj)) : NULL; } EXPORT_SYMBOL_GPL(get_device); /** * put_device - decrement reference count. * @dev: device in question. */ void put_device(struct device *dev) { /* might_sleep(); */ if (dev) kobject_put(&dev->kobj); } EXPORT_SYMBOL_GPL(put_device); bool kill_device(struct device *dev) { /* * Require the device lock and set the "dead" flag to guarantee that * the update behavior is consistent with the other bitfields near * it and that we cannot have an asynchronous probe routine trying * to run while we are tearing out the bus/class/sysfs from * underneath the device. */ lockdep_assert_held(&dev->mutex); if (dev->p->dead) return false; dev->p->dead = true; return true; } EXPORT_SYMBOL_GPL(kill_device); /** * device_del - delete device from system. * @dev: device. * * This is the first part of the device unregistration * sequence. This removes the device from the lists we control * from here, has it removed from the other driver model * subsystems it was added to in device_add(), and removes it * from the kobject hierarchy. * * NOTE: this should be called manually _iff_ device_add() was * also called manually. */ void device_del(struct device *dev) { struct device *parent = dev->parent; struct kobject *glue_dir = NULL; struct class_interface *class_intf; unsigned int noio_flag; device_lock(dev); kill_device(dev); device_unlock(dev); if (dev->fwnode && dev->fwnode->dev == dev) dev->fwnode->dev = NULL; /* Notify clients of device removal. This call must come * before dpm_sysfs_remove(). */ noio_flag = memalloc_noio_save(); if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DEL_DEVICE, dev); dpm_sysfs_remove(dev); if (parent) klist_del(&dev->p->knode_parent); if (MAJOR(dev->devt)) { devtmpfs_delete_node(dev); device_remove_sys_dev_entry(dev); device_remove_file(dev, &dev_attr_dev); } if (dev->class) { device_remove_class_symlinks(dev); mutex_lock(&dev->class->p->mutex); /* notify any interfaces that the device is now gone */ list_for_each_entry(class_intf, &dev->class->p->interfaces, node) if (class_intf->remove_dev) class_intf->remove_dev(dev, class_intf); /* remove the device from the class list */ klist_del(&dev->p->knode_class); mutex_unlock(&dev->class->p->mutex); } device_remove_file(dev, &dev_attr_uevent); device_remove_attrs(dev); bus_remove_device(dev); device_pm_remove(dev); driver_deferred_probe_del(dev); device_platform_notify(dev, KOBJ_REMOVE); device_remove_properties(dev); device_links_purge(dev); if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_REMOVED_DEVICE, dev); kobject_uevent(&dev->kobj, KOBJ_REMOVE); glue_dir = get_glue_dir(dev); kobject_del(&dev->kobj); cleanup_glue_dir(dev, glue_dir); memalloc_noio_restore(noio_flag); put_device(parent); } EXPORT_SYMBOL_GPL(device_del); /** * device_unregister - unregister device from system. * @dev: device going away. * * We do this in two parts, like we do device_register(). First, * we remove it from all the subsystems with device_del(), then * we decrement the reference count via put_device(). If that * is the final reference count, the device will be cleaned up * via device_release() above. Otherwise, the structure will * stick around until the final reference to the device is dropped. */ void device_unregister(struct device *dev) { pr_debug("device: '%s': %s\n", dev_name(dev), __func__); device_del(dev); put_device(dev); } EXPORT_SYMBOL_GPL(device_unregister); static struct device *prev_device(struct klist_iter *i) { struct klist_node *n = klist_prev(i); struct device *dev = NULL; struct device_private *p; if (n) { p = to_device_private_parent(n); dev = p->device; } return dev; } static struct device *next_device(struct klist_iter *i) { struct klist_node *n = klist_next(i); struct device *dev = NULL; struct device_private *p; if (n) { p = to_device_private_parent(n); dev = p->device; } return dev; } /** * device_get_devnode - path of device node file * @dev: device * @mode: returned file access mode * @uid: returned file owner * @gid: returned file group * @tmp: possibly allocated string * * Return the relative path of a possible device node. * Non-default names may need to allocate a memory to compose * a name. This memory is returned in tmp and needs to be * freed by the caller. */ const char *device_get_devnode(struct device *dev, umode_t *mode, kuid_t *uid, kgid_t *gid, const char **tmp) { char *s; *tmp = NULL; /* the device type may provide a specific name */ if (dev->type && dev->type->devnode) *tmp = dev->type->devnode(dev, mode, uid, gid); if (*tmp) return *tmp; /* the class may provide a specific name */ if (dev->class && dev->class->devnode) *tmp = dev->class->devnode(dev, mode); if (*tmp) return *tmp; /* return name without allocation, tmp == NULL */ if (strchr(dev_name(dev), '!') == NULL) return dev_name(dev); /* replace '!' in the name with '/' */ s = kstrdup(dev_name(dev), GFP_KERNEL); if (!s) return NULL; strreplace(s, '!', '/'); return *tmp = s; } /** * device_for_each_child - device child iterator. * @parent: parent struct device. * @fn: function to be called for each device. * @data: data for the callback. * * Iterate over @parent's child devices, and call @fn for each, * passing it @data. * * We check the return of @fn each time. If it returns anything * other than 0, we break out and return that value. */ int device_for_each_child(struct device *parent, void *data, int (*fn)(struct device *dev, void *data)) { struct klist_iter i; struct device *child; int error = 0; if (!parent->p) return 0; klist_iter_init(&parent->p->klist_children, &i); while (!error && (child = next_device(&i))) error = fn(child, data); klist_iter_exit(&i); return error; } EXPORT_SYMBOL_GPL(device_for_each_child); /** * device_for_each_child_reverse - device child iterator in reversed order. * @parent: parent struct device. * @fn: function to be called for each device. * @data: data for the callback. * * Iterate over @parent's child devices, and call @fn for each, * passing it @data. * * We check the return of @fn each time. If it returns anything * other than 0, we break out and return that value. */ int device_for_each_child_reverse(struct device *parent, void *data, int (*fn)(struct device *dev, void *data)) { struct klist_iter i; struct device *child; int error = 0; if (!parent->p) return 0; klist_iter_init(&parent->p->klist_children, &i); while ((child = prev_device(&i)) && !error) error = fn(child, data); klist_iter_exit(&i); return error; } EXPORT_SYMBOL_GPL(device_for_each_child_reverse); /** * device_find_child - device iterator for locating a particular device. * @parent: parent struct device * @match: Callback function to check device * @data: Data to pass to match function * * This is similar to the device_for_each_child() function above, but it * returns a reference to a device that is 'found' for later use, as * determined by the @match callback. * * The callback should return 0 if the device doesn't match and non-zero * if it does. If the callback returns non-zero and a reference to the * current device can be obtained, this function will return to the caller * and not iterate over any more devices. * * NOTE: you will need to drop the reference with put_device() after use. */ struct device *device_find_child(struct device *parent, void *data, int (*match)(struct device *dev, void *data)) { struct klist_iter i; struct device *child; if (!parent) return NULL; klist_iter_init(&parent->p->klist_children, &i); while ((child = next_device(&i))) if (match(child, data) && get_device(child)) break; klist_iter_exit(&i); return child; } EXPORT_SYMBOL_GPL(device_find_child); /** * device_find_child_by_name - device iterator for locating a child device. * @parent: parent struct device * @name: name of the child device * * This is similar to the device_find_child() function above, but it * returns a reference to a device that has the name @name. * * NOTE: you will need to drop the reference with put_device() after use. */ struct device *device_find_child_by_name(struct device *parent, const char *name) { struct klist_iter i; struct device *child; if (!parent) return NULL; klist_iter_init(&parent->p->klist_children, &i); while ((child = next_device(&i))) if (sysfs_streq(dev_name(child), name) && get_device(child)) break; klist_iter_exit(&i); return child; } EXPORT_SYMBOL_GPL(device_find_child_by_name); int __init devices_init(void) { devices_kset = kset_create_and_add("devices", &device_uevent_ops, NULL); if (!devices_kset) return -ENOMEM; dev_kobj = kobject_create_and_add("dev", NULL); if (!dev_kobj) goto dev_kobj_err; sysfs_dev_block_kobj = kobject_create_and_add("block", dev_kobj); if (!sysfs_dev_block_kobj) goto block_kobj_err; sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj); if (!sysfs_dev_char_kobj) goto char_kobj_err; return 0; char_kobj_err: kobject_put(sysfs_dev_block_kobj); block_kobj_err: kobject_put(dev_kobj); dev_kobj_err: kset_unregister(devices_kset); return -ENOMEM; } static int device_check_offline(struct device *dev, void *not_used) { int ret; ret = device_for_each_child(dev, NULL, device_check_offline); if (ret) return ret; return device_supports_offline(dev) && !dev->offline ? -EBUSY : 0; } /** * device_offline - Prepare the device for hot-removal. * @dev: Device to be put offline. * * Execute the device bus type's .offline() callback, if present, to prepare * the device for a subsequent hot-removal. If that succeeds, the device must * not be used until either it is removed or its bus type's .online() callback * is executed. * * Call under device_hotplug_lock. */ int device_offline(struct device *dev) { int ret; if (dev->offline_disabled) return -EPERM; ret = device_for_each_child(dev, NULL, device_check_offline); if (ret) return ret; device_lock(dev); if (device_supports_offline(dev)) { if (dev->offline) { ret = 1; } else { ret = dev->bus->offline(dev); if (!ret) { kobject_uevent(&dev->kobj, KOBJ_OFFLINE); dev->offline = true; } } } device_unlock(dev); return ret; } /** * device_online - Put the device back online after successful device_offline(). * @dev: Device to be put back online. * * If device_offline() has been successfully executed for @dev, but the device * has not been removed subsequently, execute its bus type's .online() callback * to indicate that the device can be used again. * * Call under device_hotplug_lock. */ int device_online(struct device *dev) { int ret = 0; device_lock(dev); if (device_supports_offline(dev)) { if (dev->offline) { ret = dev->bus->online(dev); if (!ret) { kobject_uevent(&dev->kobj, KOBJ_ONLINE); dev->offline = false; } } else { ret = 1; } } device_unlock(dev); return ret; } struct root_device { struct device dev; struct module *owner; }; static inline struct root_device *to_root_device(struct device *d) { return container_of(d, struct root_device, dev); } static void root_device_release(struct device *dev) { kfree(to_root_device(dev)); } /** * __root_device_register - allocate and register a root device * @name: root device name * @owner: owner module of the root device, usually THIS_MODULE * * This function allocates a root device and registers it * using device_register(). In order to free the returned * device, use root_device_unregister(). * * Root devices are dummy devices which allow other devices * to be grouped under /sys/devices. Use this function to * allocate a root device and then use it as the parent of * any device which should appear under /sys/devices/{name} * * The /sys/devices/{name} directory will also contain a * 'module' symlink which points to the @owner directory * in sysfs. * * Returns &struct device pointer on success, or ERR_PTR() on error. * * Note: You probably want to use root_device_register(). */ struct device *__root_device_register(const char *name, struct module *owner) { struct root_device *root; int err = -ENOMEM; root = kzalloc(sizeof(struct root_device), GFP_KERNEL); if (!root) return ERR_PTR(err); err = dev_set_name(&root->dev, "%s", name); if (err) { kfree(root); return ERR_PTR(err); } root->dev.release = root_device_release; err = device_register(&root->dev); if (err) { put_device(&root->dev); return ERR_PTR(err); } #ifdef CONFIG_MODULES /* gotta find a "cleaner" way to do this */ if (owner) { struct module_kobject *mk = &owner->mkobj; err = sysfs_create_link(&root->dev.kobj, &mk->kobj, "module"); if (err) { device_unregister(&root->dev); return ERR_PTR(err); } root->owner = owner; } #endif return &root->dev; } EXPORT_SYMBOL_GPL(__root_device_register); /** * root_device_unregister - unregister and free a root device * @dev: device going away * * This function unregisters and cleans up a device that was created by * root_device_register(). */ void root_device_unregister(struct device *dev) { struct root_device *root = to_root_device(dev); if (root->owner) sysfs_remove_link(&root->dev.kobj, "module"); device_unregister(dev); } EXPORT_SYMBOL_GPL(root_device_unregister); static void device_create_release(struct device *dev) { pr_debug("device: '%s': %s\n", dev_name(dev), __func__); kfree(dev); } static __printf(6, 0) struct device * device_create_groups_vargs(struct class *class, struct device *parent, dev_t devt, void *drvdata, const struct attribute_group **groups, const char *fmt, va_list args) { struct device *dev = NULL; int retval = -ENODEV; if (class == NULL || IS_ERR(class)) goto error; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) { retval = -ENOMEM; goto error; } device_initialize(dev); dev->devt = devt; dev->class = class; dev->parent = parent; dev->groups = groups; dev->release = device_create_release; dev_set_drvdata(dev, drvdata); retval = kobject_set_name_vargs(&dev->kobj, fmt, args); if (retval) goto error; retval = device_add(dev); if (retval) goto error; return dev; error: put_device(dev); return ERR_PTR(retval); } /** * device_create - creates a device and registers it with sysfs * @class: pointer to the struct class that this device should be registered to * @parent: pointer to the parent struct device of this new device, if any * @devt: the dev_t for the char device to be added * @drvdata: the data to be added to the device for callbacks * @fmt: string for the device's name * * This function can be used by char device classes. A struct device * will be created in sysfs, registered to the specified class. * * A "dev" file will be created, showing the dev_t for the device, if * the dev_t is not 0,0. * If a pointer to a parent struct device is passed in, the newly created * struct device will be a child of that device in sysfs. * The pointer to the struct device will be returned from the call. * Any further sysfs files that might be required can be created using this * pointer. * * Returns &struct device pointer on success, or ERR_PTR() on error. * * Note: the struct class passed to this function must have previously * been created with a call to class_create(). */ struct device *device_create(struct class *class, struct device *parent, dev_t devt, void *drvdata, const char *fmt, ...) { va_list vargs; struct device *dev; va_start(vargs, fmt); dev = device_create_groups_vargs(class, parent, devt, drvdata, NULL, fmt, vargs); va_end(vargs); return dev; } EXPORT_SYMBOL_GPL(device_create); /** * device_create_with_groups - creates a device and registers it with sysfs * @class: pointer to the struct class that this device should be registered to * @parent: pointer to the parent struct device of this new device, if any * @devt: the dev_t for the char device to be added * @drvdata: the data to be added to the device for callbacks * @groups: NULL-terminated list of attribute groups to be created * @fmt: string for the device's name * * This function can be used by char device classes. A struct device * will be created in sysfs, registered to the specified class. * Additional attributes specified in the groups parameter will also * be created automatically. * * A "dev" file will be created, showing the dev_t for the device, if * the dev_t is not 0,0. * If a pointer to a parent struct device is passed in, the newly created * struct device will be a child of that device in sysfs. * The pointer to the struct device will be returned from the call. * Any further sysfs files that might be required can be created using this * pointer. * * Returns &struct device pointer on success, or ERR_PTR() on error. * * Note: the struct class passed to this function must have previously * been created with a call to class_create(). */ struct device *device_create_with_groups(struct class *class, struct device *parent, dev_t devt, void *drvdata, const struct attribute_group **groups, const char *fmt, ...) { va_list vargs; struct device *dev; va_start(vargs, fmt); dev = device_create_groups_vargs(class, parent, devt, drvdata, groups, fmt, vargs); va_end(vargs); return dev; } EXPORT_SYMBOL_GPL(device_create_with_groups); /** * device_destroy - removes a device that was created with device_create() * @class: pointer to the struct class that this device was registered with * @devt: the dev_t of the device that was previously registered * * This call unregisters and cleans up a device that was created with a * call to device_create(). */ void device_destroy(struct class *class, dev_t devt) { struct device *dev; dev = class_find_device_by_devt(class, devt); if (dev) { put_device(dev); device_unregister(dev); } } EXPORT_SYMBOL_GPL(device_destroy); /** * device_rename - renames a device * @dev: the pointer to the struct device to be renamed * @new_name: the new name of the device * * It is the responsibility of the caller to provide mutual * exclusion between two different calls of device_rename * on the same device to ensure that new_name is valid and * won't conflict with other devices. * * Note: Don't call this function. Currently, the networking layer calls this * function, but that will change. The following text from Kay Sievers offers * some insight: * * Renaming devices is racy at many levels, symlinks and other stuff are not * replaced atomically, and you get a "move" uevent, but it's not easy to * connect the event to the old and new device. Device nodes are not renamed at * all, there isn't even support for that in the kernel now. * * In the meantime, during renaming, your target name might be taken by another * driver, creating conflicts. Or the old name is taken directly after you * renamed it -- then you get events for the same DEVPATH, before you even see * the "move" event. It's just a mess, and nothing new should ever rely on * kernel device renaming. Besides that, it's not even implemented now for * other things than (driver-core wise very simple) network devices. * * We are currently about to change network renaming in udev to completely * disallow renaming of devices in the same namespace as the kernel uses, * because we can't solve the problems properly, that arise with swapping names * of multiple interfaces without races. Means, renaming of eth[0-9]* will only * be allowed to some other name than eth[0-9]*, for the aforementioned * reasons. * * Make up a "real" name in the driver before you register anything, or add * some other attributes for userspace to find the device, or use udev to add * symlinks -- but never rename kernel devices later, it's a complete mess. We * don't even want to get into that and try to implement the missing pieces in * the core. We really have other pieces to fix in the driver core mess. :) */ int device_rename(struct device *dev, const char *new_name) { struct kobject *kobj = &dev->kobj; char *old_device_name = NULL; int error; dev = get_device(dev); if (!dev) return -EINVAL; dev_dbg(dev, "renaming to %s\n", new_name); old_device_name = kstrdup(dev_name(dev), GFP_KERNEL); if (!old_device_name) { error = -ENOMEM; goto out; } if (dev->class) { error = sysfs_rename_link_ns(&dev->class->p->subsys.kobj, kobj, old_device_name, new_name, kobject_namespace(kobj)); if (error) goto out; } error = kobject_rename(kobj, new_name); if (error) goto out; out: put_device(dev); kfree(old_device_name); return error; } EXPORT_SYMBOL_GPL(device_rename); static int device_move_class_links(struct device *dev, struct device *old_parent, struct device *new_parent) { int error = 0; if (old_parent) sysfs_remove_link(&dev->kobj, "device"); if (new_parent) error = sysfs_create_link(&dev->kobj, &new_parent->kobj, "device"); return error; } /** * device_move - moves a device to a new parent * @dev: the pointer to the struct device to be moved * @new_parent: the new parent of the device (can be NULL) * @dpm_order: how to reorder the dpm_list */ int device_move(struct device *dev, struct device *new_parent, enum dpm_order dpm_order) { int error; struct device *old_parent; struct kobject *new_parent_kobj; dev = get_device(dev); if (!dev) return -EINVAL; device_pm_lock(); new_parent = get_device(new_parent); new_parent_kobj = get_device_parent(dev, new_parent); if (IS_ERR(new_parent_kobj)) { error = PTR_ERR(new_parent_kobj); put_device(new_parent); goto out; } pr_debug("device: '%s': %s: moving to '%s'\n", dev_name(dev), __func__, new_parent ? dev_name(new_parent) : "<NULL>"); error = kobject_move(&dev->kobj, new_parent_kobj); if (error) { cleanup_glue_dir(dev, new_parent_kobj); put_device(new_parent); goto out; } old_parent = dev->parent; dev->parent = new_parent; if (old_parent) klist_remove(&dev->p->knode_parent); if (new_parent) { klist_add_tail(&dev->p->knode_parent, &new_parent->p->klist_children); set_dev_node(dev, dev_to_node(new_parent)); } if (dev->class) { error = device_move_class_links(dev, old_parent, new_parent); if (error) { /* We ignore errors on cleanup since we're hosed anyway... */ device_move_class_links(dev, new_parent, old_parent); if (!kobject_move(&dev->kobj, &old_parent->kobj)) { if (new_parent) klist_remove(&dev->p->knode_parent); dev->parent = old_parent; if (old_parent) { klist_add_tail(&dev->p->knode_parent, &old_parent->p->klist_children); set_dev_node(dev, dev_to_node(old_parent)); } } cleanup_glue_dir(dev, new_parent_kobj); put_device(new_parent); goto out; } } switch (dpm_order) { case DPM_ORDER_NONE: break; case DPM_ORDER_DEV_AFTER_PARENT: device_pm_move_after(dev, new_parent); devices_kset_move_after(dev, new_parent); break; case DPM_ORDER_PARENT_BEFORE_DEV: device_pm_move_before(new_parent, dev); devices_kset_move_before(new_parent, dev); break; case DPM_ORDER_DEV_LAST: device_pm_move_last(dev); devices_kset_move_last(dev); break; } put_device(old_parent); out: device_pm_unlock(); put_device(dev); return error; } EXPORT_SYMBOL_GPL(device_move); static int device_attrs_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) { struct kobject *kobj = &dev->kobj; struct class *class = dev->class; const struct device_type *type = dev->type; int error; if (class) { /* * Change the device groups of the device class for @dev to * @kuid/@kgid. */ error = sysfs_groups_change_owner(kobj, class->dev_groups, kuid, kgid); if (error) return error; } if (type) { /* * Change the device groups of the device type for @dev to * @kuid/@kgid. */ error = sysfs_groups_change_owner(kobj, type->groups, kuid, kgid); if (error) return error; } /* Change the device groups of @dev to @kuid/@kgid. */ error = sysfs_groups_change_owner(kobj, dev->groups, kuid, kgid); if (error) return error; if (device_supports_offline(dev) && !dev->offline_disabled) { /* Change online device attributes of @dev to @kuid/@kgid. */ error = sysfs_file_change_owner(kobj, dev_attr_online.attr.name, kuid, kgid); if (error) return error; } return 0; } /** * device_change_owner - change the owner of an existing device. * @dev: device. * @kuid: new owner's kuid * @kgid: new owner's kgid * * This changes the owner of @dev and its corresponding sysfs entries to * @kuid/@kgid. This function closely mirrors how @dev was added via driver * core. * * Returns 0 on success or error code on failure. */ int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) { int error; struct kobject *kobj = &dev->kobj; dev = get_device(dev); if (!dev) return -EINVAL; /* * Change the kobject and the default attributes and groups of the * ktype associated with it to @kuid/@kgid. */ error = sysfs_change_owner(kobj, kuid, kgid); if (error) goto out; /* * Change the uevent file for @dev to the new owner. The uevent file * was created in a separate step when @dev got added and we mirror * that step here. */ error = sysfs_file_change_owner(kobj, dev_attr_uevent.attr.name, kuid, kgid); if (error) goto out; /* * Change the device groups, the device groups associated with the * device class, and the groups associated with the device type of @dev * to @kuid/@kgid. */ error = device_attrs_change_owner(dev, kuid, kgid); if (error) goto out; error = dpm_sysfs_change_owner(dev, kuid, kgid); if (error) goto out; #ifdef CONFIG_BLOCK if (sysfs_deprecated && dev->class == &block_class) goto out; #endif /* * Change the owner of the symlink located in the class directory of * the device class associated with @dev which points to the actual * directory entry for @dev to @kuid/@kgid. This ensures that the * symlink shows the same permissions as its target. */ error = sysfs_link_change_owner(&dev->class->p->subsys.kobj, &dev->kobj, dev_name(dev), kuid, kgid); if (error) goto out; out: put_device(dev); return error; } EXPORT_SYMBOL_GPL(device_change_owner); /** * device_shutdown - call ->shutdown() on each device to shutdown. */ void device_shutdown(void) { struct device *dev, *parent; wait_for_device_probe(); device_block_probing(); cpufreq_suspend(); spin_lock(&devices_kset->list_lock); /* * Walk the devices list backward, shutting down each in turn. * Beware that device unplug events may also start pulling * devices offline, even as the system is shutting down. */ while (!list_empty(&devices_kset->list)) { dev = list_entry(devices_kset->list.prev, struct device, kobj.entry); /* * hold reference count of device's parent to * prevent it from being freed because parent's * lock is to be held */ parent = get_device(dev->parent); get_device(dev); /* * Make sure the device is off the kset list, in the * event that dev->*->shutdown() doesn't remove it. */ list_del_init(&dev->kobj.entry); spin_unlock(&devices_kset->list_lock); /* hold lock to avoid race with probe/release */ if (parent) device_lock(parent); device_lock(dev); /* Don't allow any more runtime suspends */ pm_runtime_get_noresume(dev); pm_runtime_barrier(dev); if (dev->class && dev->class->shutdown_pre) { if (initcall_debug) dev_info(dev, "shutdown_pre\n"); dev->class->shutdown_pre(dev); } if (dev->bus && dev->bus->shutdown) { if (initcall_debug) dev_info(dev, "shutdown\n"); dev->bus->shutdown(dev); } else if (dev->driver && dev->driver->shutdown) { if (initcall_debug) dev_info(dev, "shutdown\n"); dev->driver->shutdown(dev); } device_unlock(dev); if (parent) device_unlock(parent); put_device(dev); put_device(parent); spin_lock(&devices_kset->list_lock); } spin_unlock(&devices_kset->list_lock); } /* * Device logging functions */ #ifdef CONFIG_PRINTK static void set_dev_info(const struct device *dev, struct dev_printk_info *dev_info) { const char *subsys; memset(dev_info, 0, sizeof(*dev_info)); if (dev->class) subsys = dev->class->name; else if (dev->bus) subsys = dev->bus->name; else return; strscpy(dev_info->subsystem, subsys, sizeof(dev_info->subsystem)); /* * Add device identifier DEVICE=: * b12:8 block dev_t * c127:3 char dev_t * n8 netdev ifindex * +sound:card0 subsystem:devname */ if (MAJOR(dev->devt)) { char c; if (strcmp(subsys, "block") == 0) c = 'b'; else c = 'c'; snprintf(dev_info->device, sizeof(dev_info->device), "%c%u:%u", c, MAJOR(dev->devt), MINOR(dev->devt)); } else if (strcmp(subsys, "net") == 0) { struct net_device *net = to_net_dev(dev); snprintf(dev_info->device, sizeof(dev_info->device), "n%u", net->ifindex); } else { snprintf(dev_info->device, sizeof(dev_info->device), "+%s:%s", subsys, dev_name(dev)); } } int dev_vprintk_emit(int level, const struct device *dev, const char *fmt, va_list args) { struct dev_printk_info dev_info; set_dev_info(dev, &dev_info); return vprintk_emit(0, level, &dev_info, fmt, args); } EXPORT_SYMBOL(dev_vprintk_emit); int dev_printk_emit(int level, const struct device *dev, const char *fmt, ...) { va_list args; int r; va_start(args, fmt); r = dev_vprintk_emit(level, dev, fmt, args); va_end(args); return r; } EXPORT_SYMBOL(dev_printk_emit); static void __dev_printk(const char *level, const struct device *dev, struct va_format *vaf) { if (dev) dev_printk_emit(level[1] - '0', dev, "%s %s: %pV", dev_driver_string(dev), dev_name(dev), vaf); else printk("%s(NULL device *): %pV", level, vaf); } void dev_printk(const char *level, const struct device *dev, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; __dev_printk(level, dev, &vaf); va_end(args); } EXPORT_SYMBOL(dev_printk); #define define_dev_printk_level(func, kern_level) \ void func(const struct device *dev, const char *fmt, ...) \ { \ struct va_format vaf; \ va_list args; \ \ va_start(args, fmt); \ \ vaf.fmt = fmt; \ vaf.va = &args; \ \ __dev_printk(kern_level, dev, &vaf); \ \ va_end(args); \ } \ EXPORT_SYMBOL(func); define_dev_printk_level(_dev_emerg, KERN_EMERG); define_dev_printk_level(_dev_alert, KERN_ALERT); define_dev_printk_level(_dev_crit, KERN_CRIT); define_dev_printk_level(_dev_err, KERN_ERR); define_dev_printk_level(_dev_warn, KERN_WARNING); define_dev_printk_level(_dev_notice, KERN_NOTICE); define_dev_printk_level(_dev_info, KERN_INFO); #endif /** * dev_err_probe - probe error check and log helper * @dev: the pointer to the struct device * @err: error value to test * @fmt: printf-style format string * @...: arguments as specified in the format string * * This helper implements common pattern present in probe functions for error * checking: print debug or error message depending if the error value is * -EPROBE_DEFER and propagate error upwards. * In case of -EPROBE_DEFER it sets also defer probe reason, which can be * checked later by reading devices_deferred debugfs attribute. * It replaces code sequence:: * * if (err != -EPROBE_DEFER) * dev_err(dev, ...); * else * dev_dbg(dev, ...); * return err; * * with:: * * return dev_err_probe(dev, err, ...); * * Returns @err. * */ int dev_err_probe(const struct device *dev, int err, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; if (err != -EPROBE_DEFER) { dev_err(dev, "error %pe: %pV", ERR_PTR(err), &vaf); } else { device_set_deferred_probe_reason(dev, &vaf); dev_dbg(dev, "error %pe: %pV", ERR_PTR(err), &vaf); } va_end(args); return err; } EXPORT_SYMBOL_GPL(dev_err_probe); static inline bool fwnode_is_primary(struct fwnode_handle *fwnode) { return fwnode && !IS_ERR(fwnode->secondary); } /** * set_primary_fwnode - Change the primary firmware node of a given device. * @dev: Device to handle. * @fwnode: New primary firmware node of the device. * * Set the device's firmware node pointer to @fwnode, but if a secondary * firmware node of the device is present, preserve it. */ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) { struct device *parent = dev->parent; struct fwnode_handle *fn = dev->fwnode; if (fwnode) { if (fwnode_is_primary(fn)) fn = fn->secondary; if (fn) { WARN_ON(fwnode->secondary); fwnode->secondary = fn; } dev->fwnode = fwnode; } else { if (fwnode_is_primary(fn)) { dev->fwnode = fn->secondary; if (!(parent && fn == parent->fwnode)) fn->secondary = NULL; } else { dev->fwnode = NULL; } } } EXPORT_SYMBOL_GPL(set_primary_fwnode); /** * set_secondary_fwnode - Change the secondary firmware node of a given device. * @dev: Device to handle. * @fwnode: New secondary firmware node of the device. * * If a primary firmware node of the device is present, set its secondary * pointer to @fwnode. Otherwise, set the device's firmware node pointer to * @fwnode. */ void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode) { if (fwnode) fwnode->secondary = ERR_PTR(-ENODEV); if (fwnode_is_primary(dev->fwnode)) dev->fwnode->secondary = fwnode; else dev->fwnode = fwnode; } EXPORT_SYMBOL_GPL(set_secondary_fwnode); /** * device_set_of_node_from_dev - reuse device-tree node of another device * @dev: device whose device-tree node is being set * @dev2: device whose device-tree node is being reused * * Takes another reference to the new device-tree node after first dropping * any reference held to the old node. */ void device_set_of_node_from_dev(struct device *dev, const struct device *dev2) { of_node_put(dev->of_node); dev->of_node = of_node_get(dev2->of_node); dev->of_node_reused = true; } EXPORT_SYMBOL_GPL(device_set_of_node_from_dev); int device_match_name(struct device *dev, const void *name) { return sysfs_streq(dev_name(dev), name); } EXPORT_SYMBOL_GPL(device_match_name); int device_match_of_node(struct device *dev, const void *np) { return dev->of_node == np; } EXPORT_SYMBOL_GPL(device_match_of_node); int device_match_fwnode(struct device *dev, const void *fwnode) { return dev_fwnode(dev) == fwnode; } EXPORT_SYMBOL_GPL(device_match_fwnode); int device_match_devt(struct device *dev, const void *pdevt) { return dev->devt == *(dev_t *)pdevt; } EXPORT_SYMBOL_GPL(device_match_devt); int device_match_acpi_dev(struct device *dev, const void *adev) { return ACPI_COMPANION(dev) == adev; } EXPORT_SYMBOL(device_match_acpi_dev); int device_match_any(struct device *dev, const void *unused) { return 1; } EXPORT_SYMBOL_GPL(device_match_any);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 #ifndef _LINUX_PSI_H #define _LINUX_PSI_H #include <linux/jump_label.h> #include <linux/psi_types.h> #include <linux/sched.h> #include <linux/poll.h> struct seq_file; struct css_set; #ifdef CONFIG_PSI extern struct static_key_false psi_disabled; extern struct psi_group psi_system; void psi_init(void); void psi_task_change(struct task_struct *task, int clear, int set); void psi_task_switch(struct task_struct *prev, struct task_struct *next, bool sleep); void psi_memstall_tick(struct task_struct *task, int cpu); void psi_memstall_enter(unsigned long *flags); void psi_memstall_leave(unsigned long *flags); int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res); #ifdef CONFIG_CGROUPS int psi_cgroup_alloc(struct cgroup *cgrp); void psi_cgroup_free(struct cgroup *cgrp); void cgroup_move_task(struct task_struct *p, struct css_set *to); struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, size_t nbytes, enum psi_res res); void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t); __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait); #endif #else /* CONFIG_PSI */ static inline void psi_init(void) {} static inline void psi_memstall_enter(unsigned long *flags) {} static inline void psi_memstall_leave(unsigned long *flags) {} #ifdef CONFIG_CGROUPS static inline int psi_cgroup_alloc(struct cgroup *cgrp) { return 0; } static inline void psi_cgroup_free(struct cgroup *cgrp) { } static inline void cgroup_move_task(struct task_struct *p, struct css_set *to) { rcu_assign_pointer(p->cgroups, to); } #endif #endif /* CONFIG_PSI */ #endif /* _LINUX_PSI_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __SEQ_FILE_NET_H__ #define __SEQ_FILE_NET_H__ #include <linux/seq_file.h> struct net; extern struct net init_net; struct seq_net_private { #ifdef CONFIG_NET_NS struct net *net; #endif }; static inline struct net *seq_file_net(struct seq_file *seq) { #ifdef CONFIG_NET_NS return ((struct seq_net_private *)seq->private)->net; #else return &init_net; #endif } /* * This one is needed for proc_create_net_single since net is stored directly * in private not as a struct i.e. seq_file_net can't be used. */ static inline struct net *seq_file_single_net(struct seq_file *seq) { #ifdef CONFIG_NET_NS return (struct net *)seq->private; #else return &init_net; #endif } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Asymmetric public-key cryptography key subtype * * See Documentation/crypto/asymmetric-keys.rst * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef _KEYS_ASYMMETRIC_SUBTYPE_H #define _KEYS_ASYMMETRIC_SUBTYPE_H #include <linux/seq_file.h> #include <keys/asymmetric-type.h> struct kernel_pkey_query; struct kernel_pkey_params; struct public_key_signature; /* * Keys of this type declare a subtype that indicates the handlers and * capabilities. */ struct asymmetric_key_subtype { struct module *owner; const char *name; unsigned short name_len; /* length of name */ /* Describe a key of this subtype for /proc/keys */ void (*describe)(const struct key *key, struct seq_file *m); /* Destroy a key of this subtype */ void (*destroy)(void *payload_crypto, void *payload_auth); int (*query)(const struct kernel_pkey_params *params, struct kernel_pkey_query *info); /* Encrypt/decrypt/sign data */ int (*eds_op)(struct kernel_pkey_params *params, const void *in, void *out); /* Verify the signature on a key of this subtype (optional) */ int (*verify_signature)(const struct key *key, const struct public_key_signature *sig); }; /** * asymmetric_key_subtype - Get the subtype from an asymmetric key * @key: The key of interest. * * Retrieves and returns the subtype pointer of the asymmetric key from the * type-specific data attached to the key. */ static inline struct asymmetric_key_subtype *asymmetric_key_subtype(const struct key *key) { return key->payload.data[asym_subtype]; } #endif /* _KEYS_ASYMMETRIC_SUBTYPE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _IPV6_H #define _IPV6_H #include <uapi/linux/ipv6.h> #define ipv6_optlen(p) (((p)->hdrlen+1) << 3) #define ipv6_authlen(p) (((p)->hdrlen+2) << 2) /* * This structure contains configuration options per IPv6 link. */ struct ipv6_devconf { __s32 forwarding; __s32 hop_limit; __s32 mtu6; __s32 accept_ra; __s32 accept_redirects; __s32 autoconf; __s32 dad_transmits; __s32 rtr_solicits; __s32 rtr_solicit_interval; __s32 rtr_solicit_max_interval; __s32 rtr_solicit_delay; __s32 force_mld_version; __s32 mldv1_unsolicited_report_interval; __s32 mldv2_unsolicited_report_interval; __s32 use_tempaddr; __s32 temp_valid_lft; __s32 temp_prefered_lft; __s32 regen_max_retry; __s32 max_desync_factor; __s32 max_addresses; __s32 accept_ra_defrtr; __s32 accept_ra_min_hop_limit; __s32 accept_ra_pinfo; __s32 ignore_routes_with_linkdown; #ifdef CONFIG_IPV6_ROUTER_PREF __s32 accept_ra_rtr_pref; __s32 rtr_probe_interval; #ifdef CONFIG_IPV6_ROUTE_INFO __s32 accept_ra_rt_info_min_plen; __s32 accept_ra_rt_info_max_plen; #endif #endif __s32 proxy_ndp; __s32 accept_source_route; __s32 accept_ra_from_local; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD __s32 optimistic_dad; __s32 use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE __s32 mc_forwarding; #endif __s32 disable_ipv6; __s32 drop_unicast_in_l2_multicast; __s32 accept_dad; __s32 force_tllao; __s32 ndisc_notify; __s32 suppress_frag_ndisc; __s32 accept_ra_mtu; __s32 drop_unsolicited_na; struct ipv6_stable_secret { bool initialized; struct in6_addr secret; } stable_secret; __s32 use_oif_addrs_only; __s32 keep_addr_on_down; __s32 seg6_enabled; #ifdef CONFIG_IPV6_SEG6_HMAC __s32 seg6_require_hmac; #endif __u32 enhanced_dad; __u32 addr_gen_mode; __s32 disable_policy; __s32 ndisc_tclass; __s32 rpl_seg_enabled; struct ctl_table_header *sysctl_header; }; struct ipv6_params { __s32 disable_ipv6; __s32 autoconf; }; extern struct ipv6_params ipv6_defaults; #include <linux/tcp.h> #include <linux/udp.h> #include <net/inet_sock.h> static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb) { return (struct ipv6hdr *)skb_network_header(skb); } static inline struct ipv6hdr *inner_ipv6_hdr(const struct sk_buff *skb) { return (struct ipv6hdr *)skb_inner_network_header(skb); } static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb) { return (struct ipv6hdr *)skb_transport_header(skb); } static inline unsigned int ipv6_transport_len(const struct sk_buff *skb) { return ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr) - skb_network_header_len(skb); } /* This structure contains results of exthdrs parsing as offsets from skb->nh. */ struct inet6_skb_parm { int iif; __be16 ra; __u16 dst0; __u16 srcrt; __u16 dst1; __u16 lastopt; __u16 nhoff; __u16 flags; #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dsthao; #endif __u16 frag_max_size; #define IP6SKB_XFRM_TRANSFORMED 1 #define IP6SKB_FORWARDED 2 #define IP6SKB_REROUTED 4 #define IP6SKB_ROUTERALERT 8 #define IP6SKB_FRAGMENTED 16 #define IP6SKB_HOPBYHOP 32 #define IP6SKB_L3SLAVE 64 #define IP6SKB_JUMBOGRAM 128 }; #if defined(CONFIG_NET_L3_MASTER_DEV) static inline bool ipv6_l3mdev_skb(__u16 flags) { return flags & IP6SKB_L3SLAVE; } #else static inline bool ipv6_l3mdev_skb(__u16 flags) { return false; } #endif #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) #define IP6CBMTU(skb) ((struct ip6_mtuinfo *)((skb)->cb)) static inline int inet6_iif(const struct sk_buff *skb) { bool l3_slave = ipv6_l3mdev_skb(IP6CB(skb)->flags); return l3_slave ? skb->skb_iif : IP6CB(skb)->iif; } static inline bool inet6_is_jumbogram(const struct sk_buff *skb) { return !!(IP6CB(skb)->flags & IP6SKB_JUMBOGRAM); } /* can not be used in TCP layer after tcp_v6_fill_cb */ static inline int inet6_sdif(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (skb && ipv6_l3mdev_skb(IP6CB(skb)->flags)) return IP6CB(skb)->iif; #endif return 0; } struct tcp6_request_sock { struct tcp_request_sock tcp6rsk_tcp; }; struct ipv6_mc_socklist; struct ipv6_ac_socklist; struct ipv6_fl_socklist; struct inet6_cork { struct ipv6_txoptions *opt; u8 hop_limit; u8 tclass; }; /** * struct ipv6_pinfo - ipv6 private area * * In the struct sock hierarchy (tcp6_sock, upd6_sock, etc) * this _must_ be the last member, so that inet6_sk_generic * is able to calculate its offset from the base struct sock * by using the struct proto->slab_obj_size member. -acme */ struct ipv6_pinfo { struct in6_addr saddr; struct in6_pktinfo sticky_pktinfo; const struct in6_addr *daddr_cache; #ifdef CONFIG_IPV6_SUBTREES const struct in6_addr *saddr_cache; #endif __be32 flow_label; __u32 frag_size; /* * Packed in 16bits. * Omit one shift by putting the signed field at MSB. */ #if defined(__BIG_ENDIAN_BITFIELD) __s16 hop_limit:9; __u16 __unused_1:7; #else __u16 __unused_1:7; __s16 hop_limit:9; #endif #if defined(__BIG_ENDIAN_BITFIELD) /* Packed in 16bits. */ __s16 mcast_hops:9; __u16 __unused_2:6, mc_loop:1; #else __u16 mc_loop:1, __unused_2:6; __s16 mcast_hops:9; #endif int ucast_oif; int mcast_oif; /* pktoption flags */ union { struct { __u16 srcrt:1, osrcrt:1, rxinfo:1, rxoinfo:1, rxhlim:1, rxohlim:1, hopopts:1, ohopopts:1, dstopts:1, odstopts:1, rxflow:1, rxtclass:1, rxpmtu:1, rxorigdstaddr:1, recvfragsize:1; /* 1 bits hole */ } bits; __u16 all; } rxopt; /* sockopt flags */ __u16 recverr:1, sndflow:1, repflow:1, pmtudisc:3, padding:1, /* 1 bit hole */ srcprefs:3, /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ dontfrag:1, autoflowlabel:1, autoflowlabel_set:1, mc_all:1, recverr_rfc4884:1, rtalert_isolate:1; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; __u32 dst_cookie; __u32 rx_dst_cookie; struct ipv6_mc_socklist __rcu *ipv6_mc_list; struct ipv6_ac_socklist *ipv6_ac_list; struct ipv6_fl_socklist __rcu *ipv6_fl_list; struct ipv6_txoptions __rcu *opt; struct sk_buff *pktoptions; struct sk_buff *rxpmtu; struct inet6_cork cork; }; /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */ struct raw6_sock { /* inet_sock has to be the first member of raw6_sock */ struct inet_sock inet; __u32 checksum; /* perform checksum */ __u32 offset; /* checksum offset */ struct icmp6_filter filter; __u32 ip6mr_table; /* ipv6_pinfo has to be the last member of raw6_sock, see inet6_sk_generic */ struct ipv6_pinfo inet6; }; struct udp6_sock { struct udp_sock udp; /* ipv6_pinfo has to be the last member of udp6_sock, see inet6_sk_generic */ struct ipv6_pinfo inet6; }; struct tcp6_sock { struct tcp_sock tcp; /* ipv6_pinfo has to be the last member of tcp6_sock, see inet6_sk_generic */ struct ipv6_pinfo inet6; }; extern int inet6_sk_rebuild_header(struct sock *sk); struct tcp6_timewait_sock { struct tcp_timewait_sock tcp6tw_tcp; }; #if IS_ENABLED(CONFIG_IPV6) bool ipv6_mod_enabled(void); static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk) { return sk_fullsock(__sk) ? inet_sk(__sk)->pinet6 : NULL; } static inline struct raw6_sock *raw6_sk(const struct sock *sk) { return (struct raw6_sock *)sk; } #define __ipv6_only_sock(sk) (sk->sk_ipv6only) #define ipv6_only_sock(sk) (__ipv6_only_sock(sk)) #define ipv6_sk_rxinfo(sk) ((sk)->sk_family == PF_INET6 && \ inet6_sk(sk)->rxopt.bits.rxinfo) static inline const struct in6_addr *inet6_rcv_saddr(const struct sock *sk) { if (sk->sk_family == AF_INET6) return &sk->sk_v6_rcv_saddr; return NULL; } static inline int inet_v6_ipv6only(const struct sock *sk) { /* ipv6only field is at same position for timewait and other sockets */ return ipv6_only_sock(sk); } #else #define __ipv6_only_sock(sk) 0 #define ipv6_only_sock(sk) 0 #define ipv6_sk_rxinfo(sk) 0 static inline bool ipv6_mod_enabled(void) { return false; } static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) { return NULL; } static inline struct inet6_request_sock * inet6_rsk(const struct request_sock *rsk) { return NULL; } static inline struct raw6_sock *raw6_sk(const struct sock *sk) { return NULL; } #define inet6_rcv_saddr(__sk) NULL #define tcp_twsk_ipv6only(__sk) 0 #define inet_v6_ipv6only(__sk) 0 #endif /* IS_ENABLED(CONFIG_IPV6) */ #endif /* _IPV6_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM tcp #if !defined(_TRACE_TCP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_TCP_H #include <linux/ipv6.h> #include <linux/tcp.h> #include <linux/tracepoint.h> #include <net/ipv6.h> #include <net/tcp.h> #include <linux/sock_diag.h> #define TP_STORE_V4MAPPED(__entry, saddr, daddr) \ do { \ struct in6_addr *pin6; \ \ pin6 = (struct in6_addr *)__entry->saddr_v6; \ ipv6_addr_set_v4mapped(saddr, pin6); \ pin6 = (struct in6_addr *)__entry->daddr_v6; \ ipv6_addr_set_v4mapped(daddr, pin6); \ } while (0) #if IS_ENABLED(CONFIG_IPV6) #define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6) \ do { \ if (sk->sk_family == AF_INET6) { \ struct in6_addr *pin6; \ \ pin6 = (struct in6_addr *)__entry->saddr_v6; \ *pin6 = saddr6; \ pin6 = (struct in6_addr *)__entry->daddr_v6; \ *pin6 = daddr6; \ } else { \ TP_STORE_V4MAPPED(__entry, saddr, daddr); \ } \ } while (0) #else #define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6) \ TP_STORE_V4MAPPED(__entry, saddr, daddr) #endif /* * tcp event with arguments sk and skb * * Note: this class requires a valid sk pointer; while skb pointer could * be NULL. */ DECLARE_EVENT_CLASS(tcp_event_sk_skb, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb), TP_STRUCT__entry( __field(const void *, skbaddr) __field(const void *, skaddr) __field(int, state) __field(__u16, sport) __field(__u16, dport) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) ), TP_fast_assign( struct inet_sock *inet = inet_sk(sk); __be32 *p32; __entry->skbaddr = skb; __entry->skaddr = sk; __entry->state = sk->sk_state; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s", __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, show_tcp_state_name(__entry->state)) ); DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); /* * skb of trace_tcp_send_reset is the skb that caused RST. In case of * active reset, skb should be NULL */ DEFINE_EVENT(tcp_event_sk_skb, tcp_send_reset, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); /* * tcp event with arguments sk * * Note: this class requires a valid sk pointer. */ DECLARE_EVENT_CLASS(tcp_event_sk, TP_PROTO(struct sock *sk), TP_ARGS(sk), TP_STRUCT__entry( __field(const void *, skaddr) __field(__u16, sport) __field(__u16, dport) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) __field(__u64, sock_cookie) ), TP_fast_assign( struct inet_sock *inet = inet_sk(sk); __be32 *p32; __entry->skaddr = sk; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); __entry->sock_cookie = sock_gen_cookie(sk); ), TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock_cookie=%llx", __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, __entry->sock_cookie) ); DEFINE_EVENT(tcp_event_sk, tcp_receive_reset, TP_PROTO(struct sock *sk), TP_ARGS(sk) ); DEFINE_EVENT(tcp_event_sk, tcp_destroy_sock, TP_PROTO(struct sock *sk), TP_ARGS(sk) ); DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust, TP_PROTO(struct sock *sk), TP_ARGS(sk) ); TRACE_EVENT(tcp_retransmit_synack, TP_PROTO(const struct sock *sk, const struct request_sock *req), TP_ARGS(sk, req), TP_STRUCT__entry( __field(const void *, skaddr) __field(const void *, req) __field(__u16, sport) __field(__u16, dport) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) ), TP_fast_assign( struct inet_request_sock *ireq = inet_rsk(req); __be32 *p32; __entry->skaddr = sk; __entry->req = req; __entry->sport = ireq->ir_num; __entry->dport = ntohs(ireq->ir_rmt_port); p32 = (__be32 *) __entry->saddr; *p32 = ireq->ir_loc_addr; p32 = (__be32 *) __entry->daddr; *p32 = ireq->ir_rmt_addr; TP_STORE_ADDRS(__entry, ireq->ir_loc_addr, ireq->ir_rmt_addr, ireq->ir_v6_loc_addr, ireq->ir_v6_rmt_addr); ), TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6) ); #include <trace/events/net_probe_common.h> TRACE_EVENT(tcp_probe, TP_PROTO(struct sock *sk, struct sk_buff *skb), TP_ARGS(sk, skb), TP_STRUCT__entry( /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(__u16, sport) __field(__u16, dport) __field(__u32, mark) __field(__u16, data_len) __field(__u32, snd_nxt) __field(__u32, snd_una) __field(__u32, snd_cwnd) __field(__u32, ssthresh) __field(__u32, snd_wnd) __field(__u32, srtt) __field(__u32, rcv_wnd) __field(__u64, sock_cookie) ), TP_fast_assign( const struct tcphdr *th = (const struct tcphdr *)skb->data; const struct inet_sock *inet = inet_sk(sk); const struct tcp_sock *tp = tcp_sk(sk); memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS(__entry, inet, sk); /* For filtering use */ __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->mark = skb->mark; __entry->data_len = skb->len - __tcp_hdrlen(th); __entry->snd_nxt = tp->snd_nxt; __entry->snd_una = tp->snd_una; __entry->snd_cwnd = tp->snd_cwnd; __entry->snd_wnd = tp->snd_wnd; __entry->rcv_wnd = tp->rcv_wnd; __entry->ssthresh = tcp_current_ssthresh(sk); __entry->srtt = tp->srtt_us >> 3; __entry->sock_cookie = sock_gen_cookie(sk); ), TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx", __entry->saddr, __entry->daddr, __entry->mark, __entry->data_len, __entry->snd_nxt, __entry->snd_una, __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd, __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie) ); #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_IP6_ROUTE_H #define _NET_IP6_ROUTE_H struct route_info { __u8 type; __u8 length; __u8 prefix_len; #if defined(__BIG_ENDIAN_BITFIELD) __u8 reserved_h:3, route_pref:2, reserved_l:3; #elif defined(__LITTLE_ENDIAN_BITFIELD) __u8 reserved_l:3, route_pref:2, reserved_h:3; #endif __be32 lifetime; __u8 prefix[]; /* 0,8 or 16 */ }; #include <net/addrconf.h> #include <net/flow.h> #include <net/ip6_fib.h> #include <net/sock.h> #include <net/lwtunnel.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/route.h> #include <net/nexthop.h> #define RT6_LOOKUP_F_IFACE 0x00000001 #define RT6_LOOKUP_F_REACHABLE 0x00000002 #define RT6_LOOKUP_F_HAS_SADDR 0x00000004 #define RT6_LOOKUP_F_SRCPREF_TMP 0x00000008 #define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 #define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 #define RT6_LOOKUP_F_IGNORE_LINKSTATE 0x00000040 #define RT6_LOOKUP_F_DST_NOREF 0x00000080 /* We do not (yet ?) support IPv6 jumbograms (RFC 2675) * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header */ #define IP6_MAX_MTU (0xFFFF + sizeof(struct ipv6hdr)) /* * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate * between IPV6_ADDR_PREFERENCES socket option values * IPV6_PREFER_SRC_TMP = 0x1 * IPV6_PREFER_SRC_PUBLIC = 0x2 * IPV6_PREFER_SRC_COA = 0x4 * and above RT6_LOOKUP_F_SRCPREF_xxx flags. */ static inline int rt6_srcprefs2flags(unsigned int srcprefs) { /* No need to bitmask because srcprefs have only 3 bits. */ return srcprefs << 3; } static inline unsigned int rt6_flags2srcprefs(int flags) { return (flags >> 3) & 7; } static inline bool rt6_need_strict(const struct in6_addr *daddr) { return ipv6_addr_type(daddr) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } /* fib entries using a nexthop object can not be coalesced into * a multipath route */ static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i) { /* the RTF_ADDRCONF flag filters out RA's */ return !(f6i->fib6_flags & RTF_ADDRCONF) && !f6i->nh && f6i->fib6_nh->fib_nh_gw_family; } void ip6_route_input(struct sk_buff *skb); struct dst_entry *ip6_route_input_lookup(struct net *net, struct net_device *dev, struct flowi6 *fl6, const struct sk_buff *skb, int flags); struct dst_entry *ip6_route_output_flags_noref(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags); struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags); static inline struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, struct flowi6 *fl6) { return ip6_route_output_flags(net, sk, fl6, 0); } /* Only conditionally release dst if flags indicates * !RT6_LOOKUP_F_DST_NOREF or dst is in uncached_list. */ static inline void ip6_rt_put_flags(struct rt6_info *rt, int flags) { if (!(flags & RT6_LOOKUP_F_DST_NOREF) || !list_empty(&rt->rt6i_uncached)) ip6_rt_put(rt); } struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, int flags); struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int ifindex, struct flowi6 *fl6, const struct sk_buff *skb, int flags); void ip6_route_init_special_entries(void); int ip6_route_init(void); void ip6_route_cleanup(void); int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg); int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); int ip6_ins_rt(struct net *net, struct fib6_info *f6i); int ip6_del_rt(struct net *net, struct fib6_info *f6i, bool skip_notify); void rt6_flush_exceptions(struct fib6_info *f6i); void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args, unsigned long now); static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i, const struct in6_addr *daddr, unsigned int prefs, struct in6_addr *saddr) { int err = 0; if (f6i && f6i->fib6_prefsrc.plen) { *saddr = f6i->fib6_prefsrc.addr; } else { struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL; err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr); } return err; } struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, const struct sk_buff *skb, int flags); u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, const struct sk_buff *skb, struct flow_keys *hkeys); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); void fib6_force_start_gc(struct net *net); struct fib6_info *addrconf_f6i_alloc(struct net *net, struct inet6_dev *idev, const struct in6_addr *addr, bool anycast, gfp_t gfp_flags); struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, int flags); /* * support functions for ND * */ struct fib6_info *rt6_get_dflt_router(struct net *net, const struct in6_addr *addr, struct net_device *dev); struct fib6_info *rt6_add_dflt_router(struct net *net, const struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref); void rt6_purge_dflt_routers(struct net *net); int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr); void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, u32 mark, kuid_t uid); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu); void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, kuid_t uid); void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif); void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk); struct netlink_callback; struct rt6_rtnl_dump_arg { struct sk_buff *skb; struct netlink_callback *cb; struct net *net; struct fib_dump_filter filter; }; int rt6_dump_route(struct fib6_info *f6i, void *p_arg, unsigned int skip); void rt6_mtu_change(struct net_device *dev, unsigned int mtu); void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); void rt6_sync_up(struct net_device *dev, unsigned char nh_flags); void rt6_disable_ip(struct net_device *dev, unsigned long event); void rt6_sync_down_dev(struct net_device *dev, unsigned long event); void rt6_multipath_rebalance(struct fib6_info *f6i); void rt6_uncached_list_add(struct rt6_info *rt); void rt6_uncached_list_del(struct rt6_info *rt); static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb) { const struct dst_entry *dst = skb_dst(skb); const struct rt6_info *rt6 = NULL; if (dst) rt6 = container_of(dst, struct rt6_info, dst); return rt6; } /* * Store a destination cache entry in a socket */ static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, const struct in6_addr *daddr, const struct in6_addr *saddr) { struct ipv6_pinfo *np = inet6_sk(sk); np->dst_cookie = rt6_get_cookie((struct rt6_info *)dst); sk_setup_caps(sk, dst); np->daddr_cache = daddr; #ifdef CONFIG_IPV6_SUBTREES np->saddr_cache = saddr; #endif } void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst, const struct flowi6 *fl6); static inline bool ipv6_unicast_destination(const struct sk_buff *skb) { struct rt6_info *rt = (struct rt6_info *) skb_dst(skb); return rt->rt6i_flags & RTF_LOCAL; } static inline bool ipv6_anycast_destination(const struct dst_entry *dst, const struct in6_addr *daddr) { struct rt6_info *rt = (struct rt6_info *)dst; return rt->rt6i_flags & RTF_ANYCAST || (rt->rt6i_dst.plen < 127 && !(rt->rt6i_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) && ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)); } int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb) { unsigned int mtu; struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) { mtu = READ_ONCE(skb_dst(skb)->dev->mtu); mtu -= lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); } else mtu = dst_mtu(skb_dst(skb)); return mtu; } static inline bool ip6_sk_accept_pmtu(const struct sock *sk) { return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE && inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT; } static inline bool ip6_sk_ignore_df(const struct sock *sk) { return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO || inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT; } static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt, const struct in6_addr *daddr) { if (rt->rt6i_flags & RTF_GATEWAY) return &rt->rt6i_gateway; else if (unlikely(rt->rt6i_flags & RTF_CACHE)) return &rt->rt6i_dst.addr; else return daddr; } static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b) { struct fib6_nh *nha, *nhb; if (a->nh || b->nh) return nexthop_cmp(a->nh, b->nh); nha = a->fib6_nh; nhb = b->fib6_nh; return nha->fib_nh_dev == nhb->fib_nh_dev && ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) && !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws); } static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) { struct inet6_dev *idev; unsigned int mtu; if (dst_metric_locked(dst, RTAX_MTU)) { mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) goto out; } mtu = IPV6_MIN_MTU; rcu_read_lock(); idev = __in6_dev_get(dst->dev); if (idev) mtu = idev->cnf.mtu6; rcu_read_unlock(); out: return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } u32 ip6_mtu_from_fib6(const struct fib6_result *res, const struct in6_addr *daddr, const struct in6_addr *saddr); struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, struct net_device *dev, struct sk_buff *skb, const void *daddr); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef BLK_MQ_H #define BLK_MQ_H #include <linux/blkdev.h> #include <linux/sbitmap.h> #include <linux/srcu.h> struct blk_mq_tags; struct blk_flush_queue; /** * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware * block device */ struct blk_mq_hw_ctx { struct { /** @lock: Protects the dispatch list. */ spinlock_t lock; /** * @dispatch: Used for requests that are ready to be * dispatched to the hardware but for some reason (e.g. lack of * resources) could not be sent to the hardware. As soon as the * driver can send new requests, requests at this list will * be sent first for a fairer dispatch. */ struct list_head dispatch; /** * @state: BLK_MQ_S_* flags. Defines the state of the hw * queue (active, scheduled to restart, stopped). */ unsigned long state; } ____cacheline_aligned_in_smp; /** * @run_work: Used for scheduling a hardware queue run at a later time. */ struct delayed_work run_work; /** @cpumask: Map of available CPUs where this hctx can run. */ cpumask_var_t cpumask; /** * @next_cpu: Used by blk_mq_hctx_next_cpu() for round-robin CPU * selection from @cpumask. */ int next_cpu; /** * @next_cpu_batch: Counter of how many works left in the batch before * changing to the next CPU. */ int next_cpu_batch; /** @flags: BLK_MQ_F_* flags. Defines the behaviour of the queue. */ unsigned long flags; /** * @sched_data: Pointer owned by the IO scheduler attached to a request * queue. It's up to the IO scheduler how to use this pointer. */ void *sched_data; /** * @queue: Pointer to the request queue that owns this hardware context. */ struct request_queue *queue; /** @fq: Queue of requests that need to perform a flush operation. */ struct blk_flush_queue *fq; /** * @driver_data: Pointer to data owned by the block driver that created * this hctx */ void *driver_data; /** * @ctx_map: Bitmap for each software queue. If bit is on, there is a * pending request in that software queue. */ struct sbitmap ctx_map; /** * @dispatch_from: Software queue to be used when no scheduler was * selected. */ struct blk_mq_ctx *dispatch_from; /** * @dispatch_busy: Number used by blk_mq_update_dispatch_busy() to * decide if the hw_queue is busy using Exponential Weighted Moving * Average algorithm. */ unsigned int dispatch_busy; /** @type: HCTX_TYPE_* flags. Type of hardware queue. */ unsigned short type; /** @nr_ctx: Number of software queues. */ unsigned short nr_ctx; /** @ctxs: Array of software queues. */ struct blk_mq_ctx **ctxs; /** @dispatch_wait_lock: Lock for dispatch_wait queue. */ spinlock_t dispatch_wait_lock; /** * @dispatch_wait: Waitqueue to put requests when there is no tag * available at the moment, to wait for another try in the future. */ wait_queue_entry_t dispatch_wait; /** * @wait_index: Index of next available dispatch_wait queue to insert * requests. */ atomic_t wait_index; /** * @tags: Tags owned by the block driver. A tag at this set is only * assigned when a request is dispatched from a hardware queue. */ struct blk_mq_tags *tags; /** * @sched_tags: Tags owned by I/O scheduler. If there is an I/O * scheduler associated with a request queue, a tag is assigned when * that request is allocated. Else, this member is not used. */ struct blk_mq_tags *sched_tags; /** @queued: Number of queued requests. */ unsigned long queued; /** @run: Number of dispatched requests. */ unsigned long run; #define BLK_MQ_MAX_DISPATCH_ORDER 7 /** @dispatched: Number of dispatch requests by queue. */ unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; /** @numa_node: NUMA node the storage adapter has been connected to. */ unsigned int numa_node; /** @queue_num: Index of this hardware queue. */ unsigned int queue_num; /** * @nr_active: Number of active requests. Only used when a tag set is * shared across request queues. */ atomic_t nr_active; /** * @elevator_queued: Number of queued requests on hctx. */ atomic_t elevator_queued; /** @cpuhp_online: List to store request if CPU is going to die */ struct hlist_node cpuhp_online; /** @cpuhp_dead: List to store request if some CPU die. */ struct hlist_node cpuhp_dead; /** @kobj: Kernel object for sysfs. */ struct kobject kobj; /** @poll_considered: Count times blk_poll() was called. */ unsigned long poll_considered; /** @poll_invoked: Count how many requests blk_poll() polled. */ unsigned long poll_invoked; /** @poll_success: Count how many polled requests were completed. */ unsigned long poll_success; #ifdef CONFIG_BLK_DEBUG_FS /** * @debugfs_dir: debugfs directory for this hardware queue. Named * as cpu<cpu_number>. */ struct dentry *debugfs_dir; /** @sched_debugfs_dir: debugfs directory for the scheduler. */ struct dentry *sched_debugfs_dir; #endif /** * @hctx_list: if this hctx is not in use, this is an entry in * q->unused_hctx_list. */ struct list_head hctx_list; /** * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also * blk_mq_hw_ctx_size(). */ struct srcu_struct srcu[]; }; /** * struct blk_mq_queue_map - Map software queues to hardware queues * @mq_map: CPU ID to hardware queue index map. This is an array * with nr_cpu_ids elements. Each element has a value in the range * [@queue_offset, @queue_offset + @nr_queues). * @nr_queues: Number of hardware queues to map CPU IDs onto. * @queue_offset: First hardware queue to map onto. Used by the PCIe NVMe * driver to map each hardware queue type (enum hctx_type) onto a distinct * set of hardware queues. */ struct blk_mq_queue_map { unsigned int *mq_map; unsigned int nr_queues; unsigned int queue_offset; }; /** * enum hctx_type - Type of hardware queue * @HCTX_TYPE_DEFAULT: All I/O not otherwise accounted for. * @HCTX_TYPE_READ: Just for READ I/O. * @HCTX_TYPE_POLL: Polled I/O of any kind. * @HCTX_MAX_TYPES: Number of types of hctx. */ enum hctx_type { HCTX_TYPE_DEFAULT, HCTX_TYPE_READ, HCTX_TYPE_POLL, HCTX_MAX_TYPES, }; /** * struct blk_mq_tag_set - tag set that can be shared between request queues * @map: One or more ctx -> hctx mappings. One map exists for each * hardware queue type (enum hctx_type) that the driver wishes * to support. There are no restrictions on maps being of the * same size, and it's perfectly legal to share maps between * types. * @nr_maps: Number of elements in the @map array. A number in the range * [1, HCTX_MAX_TYPES]. * @ops: Pointers to functions that implement block driver behavior. * @nr_hw_queues: Number of hardware queues supported by the block driver that * owns this data structure. * @queue_depth: Number of tags per hardware queue, reserved tags included. * @reserved_tags: Number of tags to set aside for BLK_MQ_REQ_RESERVED tag * allocations. * @cmd_size: Number of additional bytes to allocate per request. The block * driver owns these additional bytes. * @numa_node: NUMA node the storage adapter has been connected to. * @timeout: Request processing timeout in jiffies. * @flags: Zero or more BLK_MQ_F_* flags. * @driver_data: Pointer to data owned by the block driver that created this * tag set. * @active_queues_shared_sbitmap: * number of active request queues per tag set. * @__bitmap_tags: A shared tags sbitmap, used over all hctx's * @__breserved_tags: * A shared reserved tags sbitmap, used over all hctx's * @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues * elements. * @tag_list_lock: Serializes tag_list accesses. * @tag_list: List of the request queues that use this tag set. See also * request_queue.tag_set_list. */ struct blk_mq_tag_set { struct blk_mq_queue_map map[HCTX_MAX_TYPES]; unsigned int nr_maps; const struct blk_mq_ops *ops; unsigned int nr_hw_queues; unsigned int queue_depth; unsigned int reserved_tags; unsigned int cmd_size; int numa_node; unsigned int timeout; unsigned int flags; void *driver_data; atomic_t active_queues_shared_sbitmap; struct sbitmap_queue __bitmap_tags; struct sbitmap_queue __breserved_tags; struct blk_mq_tags **tags; struct mutex tag_list_lock; struct list_head tag_list; }; /** * struct blk_mq_queue_data - Data about a request inserted in a queue * * @rq: Request pointer. * @last: If it is the last request in the queue. */ struct blk_mq_queue_data { struct request *rq; bool last; }; typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); /** * struct blk_mq_ops - Callback functions that implements block driver * behaviour. */ struct blk_mq_ops { /** * @queue_rq: Queue a new request from block IO. */ blk_status_t (*queue_rq)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); /** * @commit_rqs: If a driver uses bd->last to judge when to submit * requests to hardware, it must define this function. In case of errors * that make us stop issuing further requests, this hook serves the * purpose of kicking the hardware (which the last request otherwise * would have done). */ void (*commit_rqs)(struct blk_mq_hw_ctx *); /** * @get_budget: Reserve budget before queue request, once .queue_rq is * run, it is driver's responsibility to release the * reserved budget. Also we have to handle failure case * of .get_budget for avoiding I/O deadlock. */ bool (*get_budget)(struct request_queue *); /** * @put_budget: Release the reserved budget. */ void (*put_budget)(struct request_queue *); /** * @timeout: Called on request timeout. */ enum blk_eh_timer_return (*timeout)(struct request *, bool); /** * @poll: Called to poll for completion of a specific tag. */ int (*poll)(struct blk_mq_hw_ctx *); /** * @complete: Mark the request as complete. */ void (*complete)(struct request *); /** * @init_hctx: Called when the block layer side of a hardware queue has * been set up, allowing the driver to allocate/init matching * structures. */ int (*init_hctx)(struct blk_mq_hw_ctx *, void *, unsigned int); /** * @exit_hctx: Ditto for exit/teardown. */ void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int); /** * @init_request: Called for every command allocated by the block layer * to allow the driver to set up driver specific data. * * Tag greater than or equal to queue_depth is for setting up * flush request. */ int (*init_request)(struct blk_mq_tag_set *set, struct request *, unsigned int, unsigned int); /** * @exit_request: Ditto for exit/teardown. */ void (*exit_request)(struct blk_mq_tag_set *set, struct request *, unsigned int); /** * @initialize_rq_fn: Called from inside blk_get_request(). */ void (*initialize_rq_fn)(struct request *rq); /** * @cleanup_rq: Called before freeing one request which isn't completed * yet, and usually for freeing the driver private data. */ void (*cleanup_rq)(struct request *); /** * @busy: If set, returns whether or not this queue currently is busy. */ bool (*busy)(struct request_queue *); /** * @map_queues: This allows drivers specify their own queue mapping by * overriding the setup-time function that builds the mq_map. */ int (*map_queues)(struct blk_mq_tag_set *set); #ifdef CONFIG_BLK_DEBUG_FS /** * @show_rq: Used by the debugfs implementation to show driver-specific * information about a request. */ void (*show_rq)(struct seq_file *m, struct request *rq); #endif }; enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1, /* * Set when this device requires underlying blk-mq device for * completing IO: */ BLK_MQ_F_STACKING = 1 << 2, BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3, BLK_MQ_F_BLOCKING = 1 << 5, BLK_MQ_F_NO_SCHED = 1 << 6, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, BLK_MQ_F_ALLOC_POLICY_BITS = 1, BLK_MQ_S_STOPPED = 0, BLK_MQ_S_TAG_ACTIVE = 1, BLK_MQ_S_SCHED_RESTART = 2, /* hw queue is inactive after all its CPUs become offline */ BLK_MQ_S_INACTIVE = 3, BLK_MQ_MAX_DEPTH = 10240, BLK_MQ_CPU_WORK_BATCH = 8, }; #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \ ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \ ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) #define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \ ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \ << BLK_MQ_F_ALLOC_POLICY_START_BIT) struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, void *queuedata); struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q, bool elevator_init); struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set, const struct blk_mq_ops *ops, unsigned int queue_depth, unsigned int set_flags); void blk_mq_unregister_dev(struct device *, struct request_queue *); int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); void blk_mq_free_tag_set(struct blk_mq_tag_set *set); void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); void blk_mq_free_request(struct request *rq); bool blk_mq_queue_inflight(struct request_queue *q); enum { /* return when out of requests */ BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0), /* allocate from reserved pool */ BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1), /* set RQF_PM */ BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2), }; struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, blk_mq_req_flags_t flags); struct request *blk_mq_alloc_request_hctx(struct request_queue *q, unsigned int op, blk_mq_req_flags_t flags, unsigned int hctx_idx); struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); enum { BLK_MQ_UNIQUE_TAG_BITS = 16, BLK_MQ_UNIQUE_TAG_MASK = (1 << BLK_MQ_UNIQUE_TAG_BITS) - 1, }; u32 blk_mq_unique_tag(struct request *rq); static inline u16 blk_mq_unique_tag_to_hwq(u32 unique_tag) { return unique_tag >> BLK_MQ_UNIQUE_TAG_BITS; } static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) { return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; } /** * blk_mq_rq_state() - read the current MQ_RQ_* state of a request * @rq: target request. */ static inline enum mq_rq_state blk_mq_rq_state(struct request *rq) { return READ_ONCE(rq->state); } static inline int blk_mq_request_started(struct request *rq) { return blk_mq_rq_state(rq) != MQ_RQ_IDLE; } static inline int blk_mq_request_completed(struct request *rq) { return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE; } void blk_mq_start_request(struct request *rq); void blk_mq_end_request(struct request *rq, blk_status_t error); void __blk_mq_end_request(struct request *rq, blk_status_t error); void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); void blk_mq_complete_request(struct request *rq); bool blk_mq_complete_request_remote(struct request *rq); bool blk_mq_queue_stopped(struct request_queue *q); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_stop_hw_queues(struct request_queue *q); void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_quiesce_queue(struct request_queue *q); void blk_mq_unquiesce_queue(struct request_queue *q); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset); void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_unfreeze_queue(struct request_queue *q); void blk_freeze_queue_start(struct request_queue *q); void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, unsigned long timeout); int blk_mq_map_queues(struct blk_mq_queue_map *qmap); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); unsigned int blk_mq_rq_cpu(struct request *rq); bool __blk_should_fake_timeout(struct request_queue *q); static inline bool blk_should_fake_timeout(struct request_queue *q) { if (IS_ENABLED(CONFIG_FAIL_IO_TIMEOUT) && test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags)) return __blk_should_fake_timeout(q); return false; } /** * blk_mq_rq_from_pdu - cast a PDU to a request * @pdu: the PDU (Protocol Data Unit) to be casted * * Return: request * * Driver command data is immediately after the request. So subtract request * size to get back to the original request. */ static inline struct request *blk_mq_rq_from_pdu(void *pdu) { return pdu - sizeof(struct request); } /** * blk_mq_rq_to_pdu - cast a request to a PDU * @rq: the request to be casted * * Return: pointer to the PDU * * Driver command data is immediately after the request. So add request to get * the PDU. */ static inline void *blk_mq_rq_to_pdu(struct request *rq) { return rq + 1; } #define queue_for_each_hw_ctx(q, hctx, i) \ for ((i) = 0; (i) < (q)->nr_hw_queues && \ ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++) #define hctx_for_each_ctx(hctx, ctx, i) \ for ((i) = 0; (i) < (hctx)->nr_ctx && \ ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) { if (rq->tag != -1) return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT); return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) | BLK_QC_T_INTERNAL; } static inline void blk_mq_cleanup_rq(struct request *rq) { if (rq->q->mq_ops->cleanup_rq) rq->q->mq_ops->cleanup_rq(rq); } blk_qc_t blk_mq_submit_bio(struct bio *bio); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 /* SPDX-License-Identifier: GPL-2.0 */ /* * sysctl.h: General linux system control interface * * Begun 24 March 1995, Stephen Tweedie * **************************************************************** **************************************************************** ** ** WARNING: ** The values in this file are exported to user space via ** the sysctl() binary interface. Do *NOT* change the ** numbering of any existing values here, and do not change ** any numbers within any one set of values. If you have to ** redefine an existing interface, use a new number for it. ** The kernel will then return -ENOTDIR to any application using ** the old binary interface. ** **************************************************************** **************************************************************** */ #ifndef _LINUX_SYSCTL_H #define _LINUX_SYSCTL_H #include <linux/list.h> #include <linux/rcupdate.h> #include <linux/wait.h> #include <linux/rbtree.h> #include <linux/uidgid.h> #include <uapi/linux/sysctl.h> /* For the /proc/sys support */ struct completion; struct ctl_table; struct nsproxy; struct ctl_table_root; struct ctl_table_header; struct ctl_dir; /* Keep the same order as in fs/proc/proc_sysctl.c */ #define SYSCTL_ZERO ((void *)&sysctl_vals[0]) #define SYSCTL_ONE ((void *)&sysctl_vals[1]) #define SYSCTL_INT_MAX ((void *)&sysctl_vals[2]) extern const int sysctl_vals[]; typedef int proc_handler(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); int proc_dostring(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_dointvec(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_douintvec(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_dointvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_douintvec_minmax(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int proc_dointvec_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_dointvec_ms_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_doulongvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, void *, size_t *, loff_t *); int proc_do_large_bitmap(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_do_static_key(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); /* * Register a set of sysctl names by calling register_sysctl_table * with an initialised array of struct ctl_table's. An entry with * NULL procname terminates the table. table->de will be * set up by the registration and need not be initialised in advance. * * sysctl names can be mirrored automatically under /proc/sys. The * procname supplied controls /proc naming. * * The table's mode will be honoured for proc-fs access. * * Leaf nodes in the sysctl tree will be represented by a single file * under /proc; non-leaf nodes will be represented by directories. A * null procname disables /proc mirroring at this node. * * The data and maxlen fields of the ctl_table * struct enable minimal validation of the values being written to be * performed, and the mode field allows minimal authentication. * * There must be a proc_handler routine for any terminal nodes * mirrored under /proc/sys (non-terminals are handled by a built-in * directory handler). Several default handlers are available to * cover common cases. */ /* Support for userspace poll() to watch for changes */ struct ctl_table_poll { atomic_t event; wait_queue_head_t wait; }; static inline void *proc_sys_poll_event(struct ctl_table_poll *poll) { return (void *)(unsigned long)atomic_read(&poll->event); } #define __CTL_TABLE_POLL_INITIALIZER(name) { \ .event = ATOMIC_INIT(0), \ .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait) } #define DEFINE_CTL_TABLE_POLL(name) \ struct ctl_table_poll name = __CTL_TABLE_POLL_INITIALIZER(name) /* A sysctl table is an array of struct ctl_table: */ struct ctl_table { const char *procname; /* Text ID for /proc/sys, or zero */ void *data; int maxlen; umode_t mode; struct ctl_table *child; /* Deprecated */ proc_handler *proc_handler; /* Callback for text formatting */ struct ctl_table_poll *poll; void *extra1; void *extra2; } __randomize_layout; struct ctl_node { struct rb_node node; struct ctl_table_header *header; }; /* struct ctl_table_header is used to maintain dynamic lists of struct ctl_table trees. */ struct ctl_table_header { union { struct { struct ctl_table *ctl_table; int used; int count; int nreg; }; struct rcu_head rcu; }; struct completion *unregistering; struct ctl_table *ctl_table_arg; struct ctl_table_root *root; struct ctl_table_set *set; struct ctl_dir *parent; struct ctl_node *node; struct hlist_head inodes; /* head for proc_inode->sysctl_inodes */ }; struct ctl_dir { /* Header must be at the start of ctl_dir */ struct ctl_table_header header; struct rb_root root; }; struct ctl_table_set { int (*is_seen)(struct ctl_table_set *); struct ctl_dir dir; }; struct ctl_table_root { struct ctl_table_set default_set; struct ctl_table_set *(*lookup)(struct ctl_table_root *root); void (*set_ownership)(struct ctl_table_header *head, struct ctl_table *table, kuid_t *uid, kgid_t *gid); int (*permissions)(struct ctl_table_header *head, struct ctl_table *table); }; /* struct ctl_path describes where in the hierarchy a table is added */ struct ctl_path { const char *procname; }; #ifdef CONFIG_SYSCTL void proc_sys_poll_notify(struct ctl_table_poll *poll); extern void setup_sysctl_set(struct ctl_table_set *p, struct ctl_table_root *root, int (*is_seen)(struct ctl_table_set *)); extern void retire_sysctl_set(struct ctl_table_set *set); struct ctl_table_header *__register_sysctl_table( struct ctl_table_set *set, const char *path, struct ctl_table *table); struct ctl_table_header *__register_sysctl_paths( struct ctl_table_set *set, const struct ctl_path *path, struct ctl_table *table); struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table); struct ctl_table_header *register_sysctl_table(struct ctl_table * table); struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, struct ctl_table *table); void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init(void); void do_sysctl_args(void); extern int pwrsw_enabled; extern int unaligned_enabled; extern int unaligned_dump_stack; extern int no_unaligned_warning; extern struct ctl_table sysctl_mount_point[]; extern struct ctl_table random_table[]; extern struct ctl_table firmware_config_table[]; extern struct ctl_table epoll_table[]; #else /* CONFIG_SYSCTL */ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) { return NULL; } static inline struct ctl_table_header *register_sysctl_paths( const struct ctl_path *path, struct ctl_table *table) { return NULL; } static inline struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table) { return NULL; } static inline void unregister_sysctl_table(struct ctl_table_header * table) { } static inline void setup_sysctl_set(struct ctl_table_set *p, struct ctl_table_root *root, int (*is_seen)(struct ctl_table_set *)) { } static inline void do_sysctl_args(void) { } #endif /* CONFIG_SYSCTL */ int sysctl_max_threads(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); #endif /* _LINUX_SYSCTL_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Scatterlist Cryptographic API. * * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> * Copyright (c) 2002 David S. Miller (davem@redhat.com) * Copyright (c) 2005 Herbert Xu <herbert@gondor.apana.org.au> * * Portions derived from Cryptoapi, by Alexander Kjeldaas <astor@fast.no> * and Nettle, by Niels Möller. */ #ifndef _LINUX_CRYPTO_H #define _LINUX_CRYPTO_H #include <linux/atomic.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/bug.h> #include <linux/refcount.h> #include <linux/slab.h> #include <linux/completion.h> /* * Autoloaded crypto modules should only use a prefixed name to avoid allowing * arbitrary modules to be loaded. Loading from userspace may still need the * unprefixed names, so retains those aliases as well. * This uses __MODULE_INFO directly instead of MODULE_ALIAS because pre-4.3 * gcc (e.g. avr32 toolchain) uses __LINE__ for uniqueness, and this macro * expands twice on the same line. Instead, use a separate base name for the * alias. */ #define MODULE_ALIAS_CRYPTO(name) \ __MODULE_INFO(alias, alias_userspace, name); \ __MODULE_INFO(alias, alias_crypto, "crypto-" name) /* * Algorithm masks and types. */ #define CRYPTO_ALG_TYPE_MASK 0x0000000f #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 #define CRYPTO_ALG_TYPE_COMPRESS 0x00000002 #define CRYPTO_ALG_TYPE_AEAD 0x00000003 #define CRYPTO_ALG_TYPE_SKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_KPP 0x00000008 #define CRYPTO_ALG_TYPE_ACOMPRESS 0x0000000a #define CRYPTO_ALG_TYPE_SCOMPRESS 0x0000000b #define CRYPTO_ALG_TYPE_RNG 0x0000000c #define CRYPTO_ALG_TYPE_AKCIPHER 0x0000000d #define CRYPTO_ALG_TYPE_HASH 0x0000000e #define CRYPTO_ALG_TYPE_SHASH 0x0000000e #define CRYPTO_ALG_TYPE_AHASH 0x0000000f #define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e #define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e #define CRYPTO_ALG_TYPE_ACOMPRESS_MASK 0x0000000e #define CRYPTO_ALG_LARVAL 0x00000010 #define CRYPTO_ALG_DEAD 0x00000020 #define CRYPTO_ALG_DYING 0x00000040 #define CRYPTO_ALG_ASYNC 0x00000080 /* * Set if the algorithm (or an algorithm which it uses) requires another * algorithm of the same type to handle corner cases. */ #define CRYPTO_ALG_NEED_FALLBACK 0x00000100 /* * Set if the algorithm has passed automated run-time testing. Note that * if there is no run-time testing for a given algorithm it is considered * to have passed. */ #define CRYPTO_ALG_TESTED 0x00000400 /* * Set if the algorithm is an instance that is built from templates. */ #define CRYPTO_ALG_INSTANCE 0x00000800 /* Set this bit if the algorithm provided is hardware accelerated but * not available to userspace via instruction set or so. */ #define CRYPTO_ALG_KERN_DRIVER_ONLY 0x00001000 /* * Mark a cipher as a service implementation only usable by another * cipher and never by a normal user of the kernel crypto API */ #define CRYPTO_ALG_INTERNAL 0x00002000 /* * Set if the algorithm has a ->setkey() method but can be used without * calling it first, i.e. there is a default key. */ #define CRYPTO_ALG_OPTIONAL_KEY 0x00004000 /* * Don't trigger module loading */ #define CRYPTO_NOLOAD 0x00008000 /* * The algorithm may allocate memory during request processing, i.e. during * encryption, decryption, or hashing. Users can request an algorithm with this * flag unset if they can't handle memory allocation failures. * * This flag is currently only implemented for algorithms of type "skcipher", * "aead", "ahash", "shash", and "cipher". Algorithms of other types might not * have this flag set even if they allocate memory. * * In some edge cases, algorithms can allocate memory regardless of this flag. * To avoid these cases, users must obey the following usage constraints: * skcipher: * - The IV buffer and all scatterlist elements must be aligned to the * algorithm's alignmask. * - If the data were to be divided into chunks of size * crypto_skcipher_walksize() (with any remainder going at the end), no * chunk can cross a page boundary or a scatterlist element boundary. * aead: * - The IV buffer and all scatterlist elements must be aligned to the * algorithm's alignmask. * - The first scatterlist element must contain all the associated data, * and its pages must be !PageHighMem. * - If the plaintext/ciphertext were to be divided into chunks of size * crypto_aead_walksize() (with the remainder going at the end), no chunk * can cross a page boundary or a scatterlist element boundary. * ahash: * - The result buffer must be aligned to the algorithm's alignmask. * - crypto_ahash_finup() must not be used unless the algorithm implements * ->finup() natively. */ #define CRYPTO_ALG_ALLOCATES_MEMORY 0x00010000 /* * Transform masks and values (for crt_flags). */ #define CRYPTO_TFM_NEED_KEY 0x00000001 #define CRYPTO_TFM_REQ_MASK 0x000fff00 #define CRYPTO_TFM_REQ_FORBID_WEAK_KEYS 0x00000100 #define CRYPTO_TFM_REQ_MAY_SLEEP 0x00000200 #define CRYPTO_TFM_REQ_MAY_BACKLOG 0x00000400 /* * Miscellaneous stuff. */ #define CRYPTO_MAX_ALG_NAME 128 /* * The macro CRYPTO_MINALIGN_ATTR (along with the void * type in the actual * declaration) is used to ensure that the crypto_tfm context structure is * aligned correctly for the given architecture so that there are no alignment * faults for C data types. On architectures that support non-cache coherent * DMA, such as ARM or arm64, it also takes into account the minimal alignment * that is required to ensure that the context struct member does not share any * cachelines with the rest of the struct. This is needed to ensure that cache * maintenance for non-coherent DMA (cache invalidation in particular) does not * affect data that may be accessed by the CPU concurrently. */ #define CRYPTO_MINALIGN ARCH_KMALLOC_MINALIGN #define CRYPTO_MINALIGN_ATTR __attribute__ ((__aligned__(CRYPTO_MINALIGN))) struct scatterlist; struct crypto_async_request; struct crypto_tfm; struct crypto_type; typedef void (*crypto_completion_t)(struct crypto_async_request *req, int err); /** * DOC: Block Cipher Context Data Structures * * These data structures define the operating context for each block cipher * type. */ struct crypto_async_request { struct list_head list; crypto_completion_t complete; void *data; struct crypto_tfm *tfm; u32 flags; }; /** * DOC: Block Cipher Algorithm Definitions * * These data structures define modular crypto algorithm implementations, * managed via crypto_register_alg() and crypto_unregister_alg(). */ /** * struct cipher_alg - single-block symmetric ciphers definition * @cia_min_keysize: Minimum key size supported by the transformation. This is * the smallest key length supported by this transformation * algorithm. This must be set to one of the pre-defined * values as this is not hardware specific. Possible values * for this field can be found via git grep "_MIN_KEY_SIZE" * include/crypto/ * @cia_max_keysize: Maximum key size supported by the transformation. This is * the largest key length supported by this transformation * algorithm. This must be set to one of the pre-defined values * as this is not hardware specific. Possible values for this * field can be found via git grep "_MAX_KEY_SIZE" * include/crypto/ * @cia_setkey: Set key for the transformation. This function is used to either * program a supplied key into the hardware or store the key in the * transformation context for programming it later. Note that this * function does modify the transformation context. This function * can be called multiple times during the existence of the * transformation object, so one must make sure the key is properly * reprogrammed into the hardware. This function is also * responsible for checking the key length for validity. * @cia_encrypt: Encrypt a single block. This function is used to encrypt a * single block of data, which must be @cra_blocksize big. This * always operates on a full @cra_blocksize and it is not possible * to encrypt a block of smaller size. The supplied buffers must * therefore also be at least of @cra_blocksize size. Both the * input and output buffers are always aligned to @cra_alignmask. * In case either of the input or output buffer supplied by user * of the crypto API is not aligned to @cra_alignmask, the crypto * API will re-align the buffers. The re-alignment means that a * new buffer will be allocated, the data will be copied into the * new buffer, then the processing will happen on the new buffer, * then the data will be copied back into the original buffer and * finally the new buffer will be freed. In case a software * fallback was put in place in the @cra_init call, this function * might need to use the fallback if the algorithm doesn't support * all of the key sizes. In case the key was stored in * transformation context, the key might need to be re-programmed * into the hardware in this function. This function shall not * modify the transformation context, as this function may be * called in parallel with the same transformation object. * @cia_decrypt: Decrypt a single block. This is a reverse counterpart to * @cia_encrypt, and the conditions are exactly the same. * * All fields are mandatory and must be filled. */ struct cipher_alg { unsigned int cia_min_keysize; unsigned int cia_max_keysize; int (*cia_setkey)(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen); void (*cia_encrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); }; /** * struct compress_alg - compression/decompression algorithm * @coa_compress: Compress a buffer of specified length, storing the resulting * data in the specified buffer. Return the length of the * compressed data in dlen. * @coa_decompress: Decompress the source buffer, storing the uncompressed * data in the specified buffer. The length of the data is * returned in dlen. * * All fields are mandatory. */ struct compress_alg { int (*coa_compress)(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen); int (*coa_decompress)(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen); }; #ifdef CONFIG_CRYPTO_STATS /* * struct crypto_istat_aead - statistics for AEAD algorithm * @encrypt_cnt: number of encrypt requests * @encrypt_tlen: total data size handled by encrypt requests * @decrypt_cnt: number of decrypt requests * @decrypt_tlen: total data size handled by decrypt requests * @err_cnt: number of error for AEAD requests */ struct crypto_istat_aead { atomic64_t encrypt_cnt; atomic64_t encrypt_tlen; atomic64_t decrypt_cnt; atomic64_t decrypt_tlen; atomic64_t err_cnt; }; /* * struct crypto_istat_akcipher - statistics for akcipher algorithm * @encrypt_cnt: number of encrypt requests * @encrypt_tlen: total data size handled by encrypt requests * @decrypt_cnt: number of decrypt requests * @decrypt_tlen: total data size handled by decrypt requests * @verify_cnt: number of verify operation * @sign_cnt: number of sign requests * @err_cnt: number of error for akcipher requests */ struct crypto_istat_akcipher { atomic64_t encrypt_cnt; atomic64_t encrypt_tlen; atomic64_t decrypt_cnt; atomic64_t decrypt_tlen; atomic64_t verify_cnt; atomic64_t sign_cnt; atomic64_t err_cnt; }; /* * struct crypto_istat_cipher - statistics for cipher algorithm * @encrypt_cnt: number of encrypt requests * @encrypt_tlen: total data size handled by encrypt requests * @decrypt_cnt: number of decrypt requests * @decrypt_tlen: total data size handled by decrypt requests * @err_cnt: number of error for cipher requests */ struct crypto_istat_cipher { atomic64_t encrypt_cnt; atomic64_t encrypt_tlen; atomic64_t decrypt_cnt; atomic64_t decrypt_tlen; atomic64_t err_cnt; }; /* * struct crypto_istat_compress - statistics for compress algorithm * @compress_cnt: number of compress requests * @compress_tlen: total data size handled by compress requests * @decompress_cnt: number of decompress requests * @decompress_tlen: total data size handled by decompress requests * @err_cnt: number of error for compress requests */ struct crypto_istat_compress { atomic64_t compress_cnt; atomic64_t compress_tlen; atomic64_t decompress_cnt; atomic64_t decompress_tlen; atomic64_t err_cnt; }; /* * struct crypto_istat_hash - statistics for has algorithm * @hash_cnt: number of hash requests * @hash_tlen: total data size hashed * @err_cnt: number of error for hash requests */ struct crypto_istat_hash { atomic64_t hash_cnt; atomic64_t hash_tlen; atomic64_t err_cnt; }; /* * struct crypto_istat_kpp - statistics for KPP algorithm * @setsecret_cnt: number of setsecrey operation * @generate_public_key_cnt: number of generate_public_key operation * @compute_shared_secret_cnt: number of compute_shared_secret operation * @err_cnt: number of error for KPP requests */ struct crypto_istat_kpp { atomic64_t setsecret_cnt; atomic64_t generate_public_key_cnt; atomic64_t compute_shared_secret_cnt; atomic64_t err_cnt; }; /* * struct crypto_istat_rng: statistics for RNG algorithm * @generate_cnt: number of RNG generate requests * @generate_tlen: total data size of generated data by the RNG * @seed_cnt: number of times the RNG was seeded * @err_cnt: number of error for RNG requests */ struct crypto_istat_rng { atomic64_t generate_cnt; atomic64_t generate_tlen; atomic64_t seed_cnt; atomic64_t err_cnt; }; #endif /* CONFIG_CRYPTO_STATS */ #define cra_cipher cra_u.cipher #define cra_compress cra_u.compress /** * struct crypto_alg - definition of a cryptograpic cipher algorithm * @cra_flags: Flags describing this transformation. See include/linux/crypto.h * CRYPTO_ALG_* flags for the flags which go in here. Those are * used for fine-tuning the description of the transformation * algorithm. * @cra_blocksize: Minimum block size of this transformation. The size in bytes * of the smallest possible unit which can be transformed with * this algorithm. The users must respect this value. * In case of HASH transformation, it is possible for a smaller * block than @cra_blocksize to be passed to the crypto API for * transformation, in case of any other transformation type, an * error will be returned upon any attempt to transform smaller * than @cra_blocksize chunks. * @cra_ctxsize: Size of the operational context of the transformation. This * value informs the kernel crypto API about the memory size * needed to be allocated for the transformation context. * @cra_alignmask: Alignment mask for the input and output data buffer. The data * buffer containing the input data for the algorithm must be * aligned to this alignment mask. The data buffer for the * output data must be aligned to this alignment mask. Note that * the Crypto API will do the re-alignment in software, but * only under special conditions and there is a performance hit. * The re-alignment happens at these occasions for different * @cra_u types: cipher -- For both input data and output data * buffer; ahash -- For output hash destination buf; shash -- * For output hash destination buf. * This is needed on hardware which is flawed by design and * cannot pick data from arbitrary addresses. * @cra_priority: Priority of this transformation implementation. In case * multiple transformations with same @cra_name are available to * the Crypto API, the kernel will use the one with highest * @cra_priority. * @cra_name: Generic name (usable by multiple implementations) of the * transformation algorithm. This is the name of the transformation * itself. This field is used by the kernel when looking up the * providers of particular transformation. * @cra_driver_name: Unique name of the transformation provider. This is the * name of the provider of the transformation. This can be any * arbitrary value, but in the usual case, this contains the * name of the chip or provider and the name of the * transformation algorithm. * @cra_type: Type of the cryptographic transformation. This is a pointer to * struct crypto_type, which implements callbacks common for all * transformation types. There are multiple options, such as * &crypto_skcipher_type, &crypto_ahash_type, &crypto_rng_type. * This field might be empty. In that case, there are no common * callbacks. This is the case for: cipher, compress, shash. * @cra_u: Callbacks implementing the transformation. This is a union of * multiple structures. Depending on the type of transformation selected * by @cra_type and @cra_flags above, the associated structure must be * filled with callbacks. This field might be empty. This is the case * for ahash, shash. * @cra_init: Initialize the cryptographic transformation object. This function * is used to initialize the cryptographic transformation object. * This function is called only once at the instantiation time, right * after the transformation context was allocated. In case the * cryptographic hardware has some special requirements which need to * be handled by software, this function shall check for the precise * requirement of the transformation and put any software fallbacks * in place. * @cra_exit: Deinitialize the cryptographic transformation object. This is a * counterpart to @cra_init, used to remove various changes set in * @cra_init. * @cra_u.cipher: Union member which contains a single-block symmetric cipher * definition. See @struct @cipher_alg. * @cra_u.compress: Union member which contains a (de)compression algorithm. * See @struct @compress_alg. * @cra_module: Owner of this transformation implementation. Set to THIS_MODULE * @cra_list: internally used * @cra_users: internally used * @cra_refcnt: internally used * @cra_destroy: internally used * * @stats: union of all possible crypto_istat_xxx structures * @stats.aead: statistics for AEAD algorithm * @stats.akcipher: statistics for akcipher algorithm * @stats.cipher: statistics for cipher algorithm * @stats.compress: statistics for compress algorithm * @stats.hash: statistics for hash algorithm * @stats.rng: statistics for rng algorithm * @stats.kpp: statistics for KPP algorithm * * The struct crypto_alg describes a generic Crypto API algorithm and is common * for all of the transformations. Any variable not documented here shall not * be used by a cipher implementation as it is internal to the Crypto API. */ struct crypto_alg { struct list_head cra_list; struct list_head cra_users; u32 cra_flags; unsigned int cra_blocksize; unsigned int cra_ctxsize; unsigned int cra_alignmask; int cra_priority; refcount_t cra_refcnt; char cra_name[CRYPTO_MAX_ALG_NAME]; char cra_driver_name[CRYPTO_MAX_ALG_NAME]; const struct crypto_type *cra_type; union { struct cipher_alg cipher; struct compress_alg compress; } cra_u; int (*cra_init)(struct crypto_tfm *tfm); void (*cra_exit)(struct crypto_tfm *tfm); void (*cra_destroy)(struct crypto_alg *alg); struct module *cra_module; #ifdef CONFIG_CRYPTO_STATS union { struct crypto_istat_aead aead; struct crypto_istat_akcipher akcipher; struct crypto_istat_cipher cipher; struct crypto_istat_compress compress; struct crypto_istat_hash hash; struct crypto_istat_rng rng; struct crypto_istat_kpp kpp; } stats; #endif /* CONFIG_CRYPTO_STATS */ } CRYPTO_MINALIGN_ATTR; #ifdef CONFIG_CRYPTO_STATS void crypto_stats_init(struct crypto_alg *alg); void crypto_stats_get(struct crypto_alg *alg); void crypto_stats_aead_encrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret); void crypto_stats_aead_decrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret); void crypto_stats_ahash_update(unsigned int nbytes, int ret, struct crypto_alg *alg); void crypto_stats_ahash_final(unsigned int nbytes, int ret, struct crypto_alg *alg); void crypto_stats_akcipher_encrypt(unsigned int src_len, int ret, struct crypto_alg *alg); void crypto_stats_akcipher_decrypt(unsigned int src_len, int ret, struct crypto_alg *alg); void crypto_stats_akcipher_sign(int ret, struct crypto_alg *alg); void crypto_stats_akcipher_verify(int ret, struct crypto_alg *alg); void crypto_stats_compress(unsigned int slen, int ret, struct crypto_alg *alg); void crypto_stats_decompress(unsigned int slen, int ret, struct crypto_alg *alg); void crypto_stats_kpp_set_secret(struct crypto_alg *alg, int ret); void crypto_stats_kpp_generate_public_key(struct crypto_alg *alg, int ret); void crypto_stats_kpp_compute_shared_secret(struct crypto_alg *alg, int ret); void crypto_stats_rng_seed(struct crypto_alg *alg, int ret); void crypto_stats_rng_generate(struct crypto_alg *alg, unsigned int dlen, int ret); void crypto_stats_skcipher_encrypt(unsigned int cryptlen, int ret, struct crypto_alg *alg); void crypto_stats_skcipher_decrypt(unsigned int cryptlen, int ret, struct crypto_alg *alg); #else static inline void crypto_stats_init(struct crypto_alg *alg) {} static inline void crypto_stats_get(struct crypto_alg *alg) {} static inline void crypto_stats_aead_encrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret) {} static inline void crypto_stats_aead_decrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret) {} static inline void crypto_stats_ahash_update(unsigned int nbytes, int ret, struct crypto_alg *alg) {} static inline void crypto_stats_ahash_final(unsigned int nbytes, int ret, struct crypto_alg *alg) {} static inline void crypto_stats_akcipher_encrypt(unsigned int src_len, int ret, struct crypto_alg *alg) {} static inline void crypto_stats_akcipher_decrypt(unsigned int src_len, int ret, struct crypto_alg *alg) {} static inline void crypto_stats_akcipher_sign(int ret, struct crypto_alg *alg) {} static inline void crypto_stats_akcipher_verify(int ret, struct crypto_alg *alg) {} static inline void crypto_stats_compress(unsigned int slen, int ret, struct crypto_alg *alg) {} static inline void crypto_stats_decompress(unsigned int slen, int ret, struct crypto_alg *alg) {} static inline void crypto_stats_kpp_set_secret(struct crypto_alg *alg, int ret) {} static inline void crypto_stats_kpp_generate_public_key(struct crypto_alg *alg, int ret) {} static inline void crypto_stats_kpp_compute_shared_secret(struct crypto_alg *alg, int ret) {} static inline void crypto_stats_rng_seed(struct crypto_alg *alg, int ret) {} static inline void crypto_stats_rng_generate(struct crypto_alg *alg, unsigned int dlen, int ret) {} static inline void crypto_stats_skcipher_encrypt(unsigned int cryptlen, int ret, struct crypto_alg *alg) {} static inline void crypto_stats_skcipher_decrypt(unsigned int cryptlen, int ret, struct crypto_alg *alg) {} #endif /* * A helper struct for waiting for completion of async crypto ops */ struct crypto_wait { struct completion completion; int err; }; /* * Macro for declaring a crypto op async wait object on stack */ #define DECLARE_CRYPTO_WAIT(_wait) \ struct crypto_wait _wait = { \ COMPLETION_INITIALIZER_ONSTACK((_wait).completion), 0 } /* * Async ops completion helper functioons */ void crypto_req_done(struct crypto_async_request *req, int err); static inline int crypto_wait_req(int err, struct crypto_wait *wait) { switch (err) { case -EINPROGRESS: case -EBUSY: wait_for_completion(&wait->completion); reinit_completion(&wait->completion); err = wait->err; break; } return err; } static inline void crypto_init_wait(struct crypto_wait *wait) { init_completion(&wait->completion); } /* * Algorithm registration interface. */ int crypto_register_alg(struct crypto_alg *alg); void crypto_unregister_alg(struct crypto_alg *alg); int crypto_register_algs(struct crypto_alg *algs, int count); void crypto_unregister_algs(struct crypto_alg *algs, int count); /* * Algorithm query interface. */ int crypto_has_alg(const char *name, u32 type, u32 mask); /* * Transforms: user-instantiated objects which encapsulate algorithms * and core processing logic. Managed via crypto_alloc_*() and * crypto_free_*(), as well as the various helpers below. */ struct crypto_tfm { u32 crt_flags; int node; void (*exit)(struct crypto_tfm *tfm); struct crypto_alg *__crt_alg; void *__crt_ctx[] CRYPTO_MINALIGN_ATTR; }; struct crypto_cipher { struct crypto_tfm base; }; struct crypto_comp { struct crypto_tfm base; }; enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, CRYPTOA_TYPE, CRYPTOA_U32, __CRYPTOA_MAX, }; #define CRYPTOA_MAX (__CRYPTOA_MAX - 1) /* Maximum number of (rtattr) parameters for each template. */ #define CRYPTO_MAX_ATTRS 32 struct crypto_attr_alg { char name[CRYPTO_MAX_ALG_NAME]; }; struct crypto_attr_type { u32 type; u32 mask; }; struct crypto_attr_u32 { u32 num; }; /* * Transform user interface. */ struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask); void crypto_destroy_tfm(void *mem, struct crypto_tfm *tfm); static inline void crypto_free_tfm(struct crypto_tfm *tfm) { return crypto_destroy_tfm(tfm, tfm); } int alg_test(const char *driver, const char *alg, u32 type, u32 mask); /* * Transform helpers which query the underlying algorithm. */ static inline const char *crypto_tfm_alg_name(struct crypto_tfm *tfm) { return tfm->__crt_alg->cra_name; } static inline const char *crypto_tfm_alg_driver_name(struct crypto_tfm *tfm) { return tfm->__crt_alg->cra_driver_name; } static inline int crypto_tfm_alg_priority(struct crypto_tfm *tfm) { return tfm->__crt_alg->cra_priority; } static inline u32 crypto_tfm_alg_type(struct crypto_tfm *tfm) { return tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK; } static inline unsigned int crypto_tfm_alg_blocksize(struct crypto_tfm *tfm) { return tfm->__crt_alg->cra_blocksize; } static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm) { return tfm->__crt_alg->cra_alignmask; } static inline u32 crypto_tfm_get_flags(struct crypto_tfm *tfm) { return tfm->crt_flags; } static inline void crypto_tfm_set_flags(struct crypto_tfm *tfm, u32 flags) { tfm->crt_flags |= flags; } static inline void crypto_tfm_clear_flags(struct crypto_tfm *tfm, u32 flags) { tfm->crt_flags &= ~flags; } static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm) { return tfm->__crt_ctx; } static inline unsigned int crypto_tfm_ctx_alignment(void) { struct crypto_tfm *tfm; return __alignof__(tfm->__crt_ctx); } /** * DOC: Single Block Cipher API * * The single block cipher API is used with the ciphers of type * CRYPTO_ALG_TYPE_CIPHER (listed as type "cipher" in /proc/crypto). * * Using the single block cipher API calls, operations with the basic cipher * primitive can be implemented. These cipher primitives exclude any block * chaining operations including IV handling. * * The purpose of this single block cipher API is to support the implementation * of templates or other concepts that only need to perform the cipher operation * on one block at a time. Templates invoke the underlying cipher primitive * block-wise and process either the input or the output data of these cipher * operations. */ static inline struct crypto_cipher *__crypto_cipher_cast(struct crypto_tfm *tfm) { return (struct crypto_cipher *)tfm; } /** * crypto_alloc_cipher() - allocate single block cipher handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * single block cipher * @type: specifies the type of the cipher * @mask: specifies the mask for the cipher * * Allocate a cipher handle for a single block cipher. The returned struct * crypto_cipher is the cipher handle that is required for any subsequent API * invocation for that single block cipher. * * Return: allocated cipher handle in case of success; IS_ERR() is true in case * of an error, PTR_ERR() returns the error code. */ static inline struct crypto_cipher *crypto_alloc_cipher(const char *alg_name, u32 type, u32 mask) { type &= ~CRYPTO_ALG_TYPE_MASK; type |= CRYPTO_ALG_TYPE_CIPHER; mask |= CRYPTO_ALG_TYPE_MASK; return __crypto_cipher_cast(crypto_alloc_base(alg_name, type, mask)); } static inline struct crypto_tfm *crypto_cipher_tfm(struct crypto_cipher *tfm) { return &tfm->base; } /** * crypto_free_cipher() - zeroize and free the single block cipher handle * @tfm: cipher handle to be freed */ static inline void crypto_free_cipher(struct crypto_cipher *tfm) { crypto_free_tfm(crypto_cipher_tfm(tfm)); } /** * crypto_has_cipher() - Search for the availability of a single block cipher * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * single block cipher * @type: specifies the type of the cipher * @mask: specifies the mask for the cipher * * Return: true when the single block cipher is known to the kernel crypto API; * false otherwise */ static inline int crypto_has_cipher(const char *alg_name, u32 type, u32 mask) { type &= ~CRYPTO_ALG_TYPE_MASK; type |= CRYPTO_ALG_TYPE_CIPHER; mask |= CRYPTO_ALG_TYPE_MASK; return crypto_has_alg(alg_name, type, mask); } /** * crypto_cipher_blocksize() - obtain block size for cipher * @tfm: cipher handle * * The block size for the single block cipher referenced with the cipher handle * tfm is returned. The caller may use that information to allocate appropriate * memory for the data returned by the encryption or decryption operation * * Return: block size of cipher */ static inline unsigned int crypto_cipher_blocksize(struct crypto_cipher *tfm) { return crypto_tfm_alg_blocksize(crypto_cipher_tfm(tfm)); } static inline unsigned int crypto_cipher_alignmask(struct crypto_cipher *tfm) { return crypto_tfm_alg_alignmask(crypto_cipher_tfm(tfm)); } static inline u32 crypto_cipher_get_flags(struct crypto_cipher *tfm) { return crypto_tfm_get_flags(crypto_cipher_tfm(tfm)); } static inline void crypto_cipher_set_flags(struct crypto_cipher *tfm, u32 flags) { crypto_tfm_set_flags(crypto_cipher_tfm(tfm), flags); } static inline void crypto_cipher_clear_flags(struct crypto_cipher *tfm, u32 flags) { crypto_tfm_clear_flags(crypto_cipher_tfm(tfm), flags); } /** * crypto_cipher_setkey() - set key for cipher * @tfm: cipher handle * @key: buffer holding the key * @keylen: length of the key in bytes * * The caller provided key is set for the single block cipher referenced by the * cipher handle. * * Note, the key length determines the cipher type. Many block ciphers implement * different cipher modes depending on the key size, such as AES-128 vs AES-192 * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 * is performed. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_cipher_setkey(struct crypto_cipher *tfm, const u8 *key, unsigned int keylen); /** * crypto_cipher_encrypt_one() - encrypt one block of plaintext * @tfm: cipher handle * @dst: points to the buffer that will be filled with the ciphertext * @src: buffer holding the plaintext to be encrypted * * Invoke the encryption operation of one block. The caller must ensure that * the plaintext and ciphertext buffers are at least one block in size. */ void crypto_cipher_encrypt_one(struct crypto_cipher *tfm, u8 *dst, const u8 *src); /** * crypto_cipher_decrypt_one() - decrypt one block of ciphertext * @tfm: cipher handle * @dst: points to the buffer that will be filled with the plaintext * @src: buffer holding the ciphertext to be decrypted * * Invoke the decryption operation of one block. The caller must ensure that * the plaintext and ciphertext buffers are at least one block in size. */ void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, u8 *dst, const u8 *src); static inline struct crypto_comp *__crypto_comp_cast(struct crypto_tfm *tfm) { return (struct crypto_comp *)tfm; } static inline struct crypto_comp *crypto_alloc_comp(const char *alg_name, u32 type, u32 mask) { type &= ~CRYPTO_ALG_TYPE_MASK; type |= CRYPTO_ALG_TYPE_COMPRESS; mask |= CRYPTO_ALG_TYPE_MASK; return __crypto_comp_cast(crypto_alloc_base(alg_name, type, mask)); } static inline struct crypto_tfm *crypto_comp_tfm(struct crypto_comp *tfm) { return &tfm->base; } static inline void crypto_free_comp(struct crypto_comp *tfm) { crypto_free_tfm(crypto_comp_tfm(tfm)); } static inline int crypto_has_comp(const char *alg_name, u32 type, u32 mask) { type &= ~CRYPTO_ALG_TYPE_MASK; type |= CRYPTO_ALG_TYPE_COMPRESS; mask |= CRYPTO_ALG_TYPE_MASK; return crypto_has_alg(alg_name, type, mask); } static inline const char *crypto_comp_name(struct crypto_comp *tfm) { return crypto_tfm_alg_name(crypto_comp_tfm(tfm)); } int crypto_comp_compress(struct crypto_comp *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen); int crypto_comp_decompress(struct crypto_comp *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen); #endif /* _LINUX_CRYPTO_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __MAC802154_DRIVER_OPS #define __MAC802154_DRIVER_OPS #include <linux/types.h> #include <linux/rtnetlink.h> #include <net/mac802154.h> #include "ieee802154_i.h" #include "trace.h" static inline int drv_xmit_async(struct ieee802154_local *local, struct sk_buff *skb) { return local->ops->xmit_async(&local->hw, skb); } static inline int drv_xmit_sync(struct ieee802154_local *local, struct sk_buff *skb) { might_sleep(); return local->ops->xmit_sync(&local->hw, skb); } static inline int drv_start(struct ieee802154_local *local) { int ret; might_sleep(); trace_802154_drv_start(local); local->started = true; smp_mb(); ret = local->ops->start(&local->hw); trace_802154_drv_return_int(local, ret); return ret; } static inline void drv_stop(struct ieee802154_local *local) { might_sleep(); trace_802154_drv_stop(local); local->ops->stop(&local->hw); trace_802154_drv_return_void(local); /* sync away all work on the tasklet before clearing started */ tasklet_disable(&local->tasklet); tasklet_enable(&local->tasklet); barrier(); local->started = false; } static inline int drv_set_channel(struct ieee802154_local *local, u8 page, u8 channel) { int ret; might_sleep(); trace_802154_drv_set_channel(local, page, channel); ret = local->ops->set_channel(&local->hw, page, channel); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_tx_power(struct ieee802154_local *local, s32 mbm) { int ret; might_sleep(); if (!local->ops->set_txpower) { WARN_ON(1); return -EOPNOTSUPP; } trace_802154_drv_set_tx_power(local, mbm); ret = local->ops->set_txpower(&local->hw, mbm); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_cca_mode(struct ieee802154_local *local, const struct wpan_phy_cca *cca) { int ret; might_sleep(); if (!local->ops->set_cca_mode) { WARN_ON(1); return -EOPNOTSUPP; } trace_802154_drv_set_cca_mode(local, cca); ret = local->ops->set_cca_mode(&local->hw, cca); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_lbt_mode(struct ieee802154_local *local, bool mode) { int ret; might_sleep(); if (!local->ops->set_lbt) { WARN_ON(1); return -EOPNOTSUPP; } trace_802154_drv_set_lbt_mode(local, mode); ret = local->ops->set_lbt(&local->hw, mode); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_cca_ed_level(struct ieee802154_local *local, s32 mbm) { int ret; might_sleep(); if (!local->ops->set_cca_ed_level) { WARN_ON(1); return -EOPNOTSUPP; } trace_802154_drv_set_cca_ed_level(local, mbm); ret = local->ops->set_cca_ed_level(&local->hw, mbm); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_pan_id(struct ieee802154_local *local, __le16 pan_id) { struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); if (!local->ops->set_hw_addr_filt) { WARN_ON(1); return -EOPNOTSUPP; } filt.pan_id = pan_id; trace_802154_drv_set_pan_id(local, pan_id); ret = local->ops->set_hw_addr_filt(&local->hw, &filt, IEEE802154_AFILT_PANID_CHANGED); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_extended_addr(struct ieee802154_local *local, __le64 extended_addr) { struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); if (!local->ops->set_hw_addr_filt) { WARN_ON(1); return -EOPNOTSUPP; } filt.ieee_addr = extended_addr; trace_802154_drv_set_extended_addr(local, extended_addr); ret = local->ops->set_hw_addr_filt(&local->hw, &filt, IEEE802154_AFILT_IEEEADDR_CHANGED); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_short_addr(struct ieee802154_local *local, __le16 short_addr) { struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); if (!local->ops->set_hw_addr_filt) { WARN_ON(1); return -EOPNOTSUPP; } filt.short_addr = short_addr; trace_802154_drv_set_short_addr(local, short_addr); ret = local->ops->set_hw_addr_filt(&local->hw, &filt, IEEE802154_AFILT_SADDR_CHANGED); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_pan_coord(struct ieee802154_local *local, bool is_coord) { struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); if (!local->ops->set_hw_addr_filt) { WARN_ON(1); return -EOPNOTSUPP; } filt.pan_coord = is_coord; trace_802154_drv_set_pan_coord(local, is_coord); ret = local->ops->set_hw_addr_filt(&local->hw, &filt, IEEE802154_AFILT_PANC_CHANGED); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_csma_params(struct ieee802154_local *local, u8 min_be, u8 max_be, u8 max_csma_backoffs) { int ret; might_sleep(); if (!local->ops->set_csma_params) { WARN_ON(1); return -EOPNOTSUPP; } trace_802154_drv_set_csma_params(local, min_be, max_be, max_csma_backoffs); ret = local->ops->set_csma_params(&local->hw, min_be, max_be, max_csma_backoffs); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_max_frame_retries(struct ieee802154_local *local, s8 max_frame_retries) { int ret; might_sleep(); if (!local->ops->set_frame_retries) { WARN_ON(1); return -EOPNOTSUPP; } trace_802154_drv_set_max_frame_retries(local, max_frame_retries); ret = local->ops->set_frame_retries(&local->hw, max_frame_retries); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_promiscuous_mode(struct ieee802154_local *local, bool on) { int ret; might_sleep(); if (!local->ops->set_promiscuous_mode) { WARN_ON(1); return -EOPNOTSUPP; } trace_802154_drv_set_promiscuous_mode(local, on); ret = local->ops->set_promiscuous_mode(&local->hw, on); trace_802154_drv_return_int(local, ret); return ret; } #endif /* __MAC802154_DRIVER_OPS */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Hash algorithms. * * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_INTERNAL_HASH_H #define _CRYPTO_INTERNAL_HASH_H #include <crypto/algapi.h> #include <crypto/hash.h> struct ahash_request; struct scatterlist; struct crypto_hash_walk { char *data; unsigned int offset; unsigned int alignmask; struct page *pg; unsigned int entrylen; unsigned int total; struct scatterlist *sg; unsigned int flags; }; struct ahash_instance { void (*free)(struct ahash_instance *inst); union { struct { char head[offsetof(struct ahash_alg, halg.base)]; struct crypto_instance base; } s; struct ahash_alg alg; }; }; struct shash_instance { void (*free)(struct shash_instance *inst); union { struct { char head[offsetof(struct shash_alg, base)]; struct crypto_instance base; } s; struct shash_alg alg; }; }; struct crypto_ahash_spawn { struct crypto_spawn base; }; struct crypto_shash_spawn { struct crypto_spawn base; }; int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err); int crypto_hash_walk_first(struct ahash_request *req, struct crypto_hash_walk *walk); static inline int crypto_hash_walk_last(struct crypto_hash_walk *walk) { return !(walk->entrylen | walk->total); } int crypto_register_ahash(struct ahash_alg *alg); void crypto_unregister_ahash(struct ahash_alg *alg); int crypto_register_ahashes(struct ahash_alg *algs, int count); void crypto_unregister_ahashes(struct ahash_alg *algs, int count); int ahash_register_instance(struct crypto_template *tmpl, struct ahash_instance *inst); bool crypto_shash_alg_has_setkey(struct shash_alg *alg); static inline bool crypto_shash_alg_needs_key(struct shash_alg *alg) { return crypto_shash_alg_has_setkey(alg) && !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY); } bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg); int crypto_grab_ahash(struct crypto_ahash_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); static inline void crypto_drop_ahash(struct crypto_ahash_spawn *spawn) { crypto_drop_spawn(&spawn->base); } static inline struct hash_alg_common *crypto_spawn_ahash_alg( struct crypto_ahash_spawn *spawn) { return __crypto_hash_alg_common(spawn->base.alg); } int crypto_register_shash(struct shash_alg *alg); void crypto_unregister_shash(struct shash_alg *alg); int crypto_register_shashes(struct shash_alg *algs, int count); void crypto_unregister_shashes(struct shash_alg *algs, int count); int shash_register_instance(struct crypto_template *tmpl, struct shash_instance *inst); void shash_free_singlespawn_instance(struct shash_instance *inst); int crypto_grab_shash(struct crypto_shash_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); static inline void crypto_drop_shash(struct crypto_shash_spawn *spawn) { crypto_drop_spawn(&spawn->base); } static inline struct shash_alg *crypto_spawn_shash_alg( struct crypto_shash_spawn *spawn) { return __crypto_shash_alg(spawn->base.alg); } int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc); int shash_ahash_finup(struct ahash_request *req, struct shash_desc *desc); int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc); int crypto_init_shash_ops_async(struct crypto_tfm *tfm); static inline void *crypto_ahash_ctx(struct crypto_ahash *tfm) { return crypto_tfm_ctx(crypto_ahash_tfm(tfm)); } static inline struct ahash_alg *__crypto_ahash_alg(struct crypto_alg *alg) { return container_of(__crypto_hash_alg_common(alg), struct ahash_alg, halg); } static inline void crypto_ahash_set_reqsize(struct crypto_ahash *tfm, unsigned int reqsize) { tfm->reqsize = reqsize; } static inline struct crypto_instance *ahash_crypto_instance( struct ahash_instance *inst) { return &inst->s.base; } static inline struct ahash_instance *ahash_instance( struct crypto_instance *inst) { return container_of(inst, struct ahash_instance, s.base); } static inline struct ahash_instance *ahash_alg_instance( struct crypto_ahash *ahash) { return ahash_instance(crypto_tfm_alg_instance(&ahash->base)); } static inline void *ahash_instance_ctx(struct ahash_instance *inst) { return crypto_instance_ctx(ahash_crypto_instance(inst)); } static inline void ahash_request_complete(struct ahash_request *req, int err) { req->base.complete(&req->base, err); } static inline u32 ahash_request_flags(struct ahash_request *req) { return req->base.flags; } static inline struct crypto_ahash *crypto_spawn_ahash( struct crypto_ahash_spawn *spawn) { return crypto_spawn_tfm2(&spawn->base); } static inline int ahash_enqueue_request(struct crypto_queue *queue, struct ahash_request *request) { return crypto_enqueue_request(queue, &request->base); } static inline struct ahash_request *ahash_dequeue_request( struct crypto_queue *queue) { return ahash_request_cast(crypto_dequeue_request(queue)); } static inline void *crypto_shash_ctx(struct crypto_shash *tfm) { return crypto_tfm_ctx(&tfm->base); } static inline struct crypto_instance *shash_crypto_instance( struct shash_instance *inst) { return &inst->s.base; } static inline struct shash_instance *shash_instance( struct crypto_instance *inst) { return container_of(inst, struct shash_instance, s.base); } static inline struct shash_instance *shash_alg_instance( struct crypto_shash *shash) { return shash_instance(crypto_tfm_alg_instance(&shash->base)); } static inline void *shash_instance_ctx(struct shash_instance *inst) { return crypto_instance_ctx(shash_crypto_instance(inst)); } static inline struct crypto_shash *crypto_spawn_shash( struct crypto_shash_spawn *spawn) { return crypto_spawn_tfm2(&spawn->base); } static inline void *crypto_shash_ctx_aligned(struct crypto_shash *tfm) { return crypto_tfm_ctx_aligned(&tfm->base); } static inline struct crypto_shash *__crypto_shash_cast(struct crypto_tfm *tfm) { return container_of(tfm, struct crypto_shash, base); } #endif /* _CRYPTO_INTERNAL_HASH_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_MSR_H #define _ASM_X86_MSR_H #include "msr-index.h" #ifndef __ASSEMBLY__ #include <asm/asm.h> #include <asm/errno.h> #include <asm/cpumask.h> #include <uapi/asm/msr.h> struct msr { union { struct { u32 l; u32 h; }; u64 q; }; }; struct msr_info { u32 msr_no; struct msr reg; struct msr *msrs; int err; }; struct msr_regs_info { u32 *regs; int err; }; struct saved_msr { bool valid; struct msr_info info; }; struct saved_msrs { unsigned int num; struct saved_msr *array; }; /* * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A" * constraint has different meanings. For i386, "A" means exactly * edx:eax, while for x86_64 it doesn't mean rdx:rax or edx:eax. Instead, * it means rax *or* rdx. */ #ifdef CONFIG_X86_64 /* Using 64-bit values saves one instruction clearing the high half of low */ #define DECLARE_ARGS(val, low, high) unsigned long low, high #define EAX_EDX_VAL(val, low, high) ((low) | (high) << 32) #define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high) #else #define DECLARE_ARGS(val, low, high) unsigned long long val #define EAX_EDX_VAL(val, low, high) (val) #define EAX_EDX_RET(val, low, high) "=A" (val) #endif /* * Be very careful with includes. This header is prone to include loops. */ #include <asm/atomic.h> #include <linux/tracepoint-defs.h> #ifdef CONFIG_TRACEPOINTS DECLARE_TRACEPOINT(read_msr); DECLARE_TRACEPOINT(write_msr); DECLARE_TRACEPOINT(rdpmc); extern void do_trace_write_msr(unsigned int msr, u64 val, int failed); extern void do_trace_read_msr(unsigned int msr, u64 val, int failed); extern void do_trace_rdpmc(unsigned int msr, u64 val, int failed); #else static inline void do_trace_write_msr(unsigned int msr, u64 val, int failed) {} static inline void do_trace_read_msr(unsigned int msr, u64 val, int failed) {} static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {} #endif /* * __rdmsr() and __wrmsr() are the two primitives which are the bare minimum MSR * accessors and should not have any tracing or other functionality piggybacking * on them - those are *purely* for accessing MSRs and nothing more. So don't even * think of extending them - you will be slapped with a stinking trout or a frozen * shark will reach you, wherever you are! You've been warned. */ static __always_inline unsigned long long __rdmsr(unsigned int msr) { DECLARE_ARGS(val, low, high); asm volatile("1: rdmsr\n" "2:\n" _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe) : EAX_EDX_RET(val, low, high) : "c" (msr)); return EAX_EDX_VAL(val, low, high); } static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high) { asm volatile("1: wrmsr\n" "2:\n" _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe) : : "c" (msr), "a"(low), "d" (high) : "memory"); } #define native_rdmsr(msr, val1, val2) \ do { \ u64 __val = __rdmsr((msr)); \ (void)((val1) = (u32)__val); \ (void)((val2) = (u32)(__val >> 32)); \ } while (0) #define native_wrmsr(msr, low, high) \ __wrmsr(msr, low, high) #define native_wrmsrl(msr, val) \ __wrmsr((msr), (u32)((u64)(val)), \ (u32)((u64)(val) >> 32)) static inline unsigned long long native_read_msr(unsigned int msr) { unsigned long long val; val = __rdmsr(msr); if (tracepoint_enabled(read_msr)) do_trace_read_msr(msr, val, 0); return val; } static inline unsigned long long native_read_msr_safe(unsigned int msr, int *err) { DECLARE_ARGS(val, low, high); asm volatile("2: rdmsr ; xor %[err],%[err]\n" "1:\n\t" ".section .fixup,\"ax\"\n\t" "3: mov %[fault],%[err]\n\t" "xorl %%eax, %%eax\n\t" "xorl %%edx, %%edx\n\t" "jmp 1b\n\t" ".previous\n\t" _ASM_EXTABLE(2b, 3b) : [err] "=r" (*err), EAX_EDX_RET(val, low, high) : "c" (msr), [fault] "i" (-EIO)); if (tracepoint_enabled(read_msr)) do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err); return EAX_EDX_VAL(val, low, high); } /* Can be uninlined because referenced by paravirt */ static inline void notrace native_write_msr(unsigned int msr, u32 low, u32 high) { __wrmsr(msr, low, high); if (tracepoint_enabled(write_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), 0); } /* Can be uninlined because referenced by paravirt */ static inline int notrace native_write_msr_safe(unsigned int msr, u32 low, u32 high) { int err; asm volatile("2: wrmsr ; xor %[err],%[err]\n" "1:\n\t" ".section .fixup,\"ax\"\n\t" "3: mov %[fault],%[err] ; jmp 1b\n\t" ".previous\n\t" _ASM_EXTABLE(2b, 3b) : [err] "=a" (err) : "c" (msr), "0" (low), "d" (high), [fault] "i" (-EIO) : "memory"); if (tracepoint_enabled(write_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), err); return err; } extern int rdmsr_safe_regs(u32 regs[8]); extern int wrmsr_safe_regs(u32 regs[8]); /** * rdtsc() - returns the current TSC without ordering constraints * * rdtsc() returns the result of RDTSC as a 64-bit integer. The * only ordering constraint it supplies is the ordering implied by * "asm volatile": it will put the RDTSC in the place you expect. The * CPU can and will speculatively execute that RDTSC, though, so the * results can be non-monotonic if compared on different CPUs. */ static __always_inline unsigned long long rdtsc(void) { DECLARE_ARGS(val, low, high); asm volatile("rdtsc" : EAX_EDX_RET(val, low, high)); return EAX_EDX_VAL(val, low, high); } /** * rdtsc_ordered() - read the current TSC in program order * * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer. * It is ordered like a load to a global in-memory counter. It should * be impossible to observe non-monotonic rdtsc_unordered() behavior * across multiple CPUs as long as the TSC is synced. */ static __always_inline unsigned long long rdtsc_ordered(void) { DECLARE_ARGS(val, low, high); /* * The RDTSC instruction is not ordered relative to memory * access. The Intel SDM and the AMD APM are both vague on this * point, but empirically an RDTSC instruction can be * speculatively executed before prior loads. An RDTSC * immediately after an appropriate barrier appears to be * ordered as a normal load, that is, it provides the same * ordering guarantees as reading from a global memory location * that some other imaginary CPU is updating continuously with a * time stamp. * * Thus, use the preferred barrier on the respective CPU, aiming for * RDTSCP as the default. */ asm volatile(ALTERNATIVE_2("rdtsc", "lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC, "rdtscp", X86_FEATURE_RDTSCP) : EAX_EDX_RET(val, low, high) /* RDTSCP clobbers ECX with MSR_TSC_AUX. */ :: "ecx"); return EAX_EDX_VAL(val, low, high); } static inline unsigned long long native_read_pmc(int counter) { DECLARE_ARGS(val, low, high); asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter)); if (tracepoint_enabled(rdpmc)) do_trace_rdpmc(counter, EAX_EDX_VAL(val, low, high), 0); return EAX_EDX_VAL(val, low, high); } #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #include <linux/errno.h> /* * Access to machine-specific registers (available on 586 and better only) * Note: the rd* operations modify the parameters directly (without using * pointer indirection), this allows gcc to optimize better */ #define rdmsr(msr, low, high) \ do { \ u64 __val = native_read_msr((msr)); \ (void)((low) = (u32)__val); \ (void)((high) = (u32)(__val >> 32)); \ } while (0) static inline void wrmsr(unsigned int msr, u32 low, u32 high) { native_write_msr(msr, low, high); } #define rdmsrl(msr, val) \ ((val) = native_read_msr((msr))) static inline void wrmsrl(unsigned int msr, u64 val) { native_write_msr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32)); } /* wrmsr with exception handling */ static inline int wrmsr_safe(unsigned int msr, u32 low, u32 high) { return native_write_msr_safe(msr, low, high); } /* rdmsr with exception handling */ #define rdmsr_safe(msr, low, high) \ ({ \ int __err; \ u64 __val = native_read_msr_safe((msr), &__err); \ (*low) = (u32)__val; \ (*high) = (u32)(__val >> 32); \ __err; \ }) static inline int rdmsrl_safe(unsigned int msr, unsigned long long *p) { int err; *p = native_read_msr_safe(msr, &err); return err; } #define rdpmc(counter, low, high) \ do { \ u64 _l = native_read_pmc((counter)); \ (low) = (u32)_l; \ (high) = (u32)(_l >> 32); \ } while (0) #define rdpmcl(counter, val) ((val) = native_read_pmc(counter)) #endif /* !CONFIG_PARAVIRT_XXL */ /* * 64-bit version of wrmsr_safe(): */ static inline int wrmsrl_safe(u32 msr, u64 val) { return wrmsr_safe(msr, (u32)val, (u32)(val >> 32)); } #define write_tsc(low, high) wrmsr(MSR_IA32_TSC, (low), (high)) #define write_rdtscp_aux(val) wrmsr(MSR_TSC_AUX, (val), 0) struct msr *msrs_alloc(void); void msrs_free(struct msr *msrs); int msr_set_bit(u32 msr, u8 bit); int msr_clear_bit(u32 msr, u8 bit); #ifdef CONFIG_SMP int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q); int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q); void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q); int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q); int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); #else /* CONFIG_SMP */ static inline int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) { rdmsr(msr_no, *l, *h); return 0; } static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) { wrmsr(msr_no, l, h); return 0; } static inline int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) { rdmsrl(msr_no, *q); return 0; } static inline int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q) { wrmsrl(msr_no, q); return 0; } static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no, struct msr *msrs) { rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h)); } static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no, struct msr *msrs) { wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h); } static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) { return rdmsr_safe(msr_no, l, h); } static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) { return wrmsr_safe(msr_no, l, h); } static inline int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) { return rdmsrl_safe(msr_no, q); } static inline int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q) { return wrmsrl_safe(msr_no, q); } static inline int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) { return rdmsr_safe_regs(regs); } static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) { return wrmsr_safe_regs(regs); } #endif /* CONFIG_SMP */ #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_MSR_H */
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ /* Bluetooth HCI sockets. */ #include <linux/compat.h> #include <linux/export.h> #include <linux/utsname.h> #include <linux/sched.h> #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/hci_mon.h> #include <net/bluetooth/mgmt.h> #include "mgmt_util.h" static LIST_HEAD(mgmt_chan_list); static DEFINE_MUTEX(mgmt_chan_list_lock); static DEFINE_IDA(sock_cookie_ida); static atomic_t monitor_promisc = ATOMIC_INIT(0); /* ----- HCI socket interface ----- */ /* Socket info */ #define hci_pi(sk) ((struct hci_pinfo *) sk) struct hci_pinfo { struct bt_sock bt; struct hci_dev *hdev; struct hci_filter filter; __u8 cmsg_mask; unsigned short channel; unsigned long flags; __u32 cookie; char comm[TASK_COMM_LEN]; }; static struct hci_dev *hci_hdev_from_sock(struct sock *sk) { struct hci_dev *hdev = hci_pi(sk)->hdev; if (!hdev) return ERR_PTR(-EBADFD); if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) return ERR_PTR(-EPIPE); return hdev; } void hci_sock_set_flag(struct sock *sk, int nr) { set_bit(nr, &hci_pi(sk)->flags); } void hci_sock_clear_flag(struct sock *sk, int nr) { clear_bit(nr, &hci_pi(sk)->flags); } int hci_sock_test_flag(struct sock *sk, int nr) { return test_bit(nr, &hci_pi(sk)->flags); } unsigned short hci_sock_get_channel(struct sock *sk) { return hci_pi(sk)->channel; } u32 hci_sock_get_cookie(struct sock *sk) { return hci_pi(sk)->cookie; } static bool hci_sock_gen_cookie(struct sock *sk) { int id = hci_pi(sk)->cookie; if (!id) { id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL); if (id < 0) id = 0xffffffff; hci_pi(sk)->cookie = id; get_task_comm(hci_pi(sk)->comm, current); return true; } return false; } static void hci_sock_free_cookie(struct sock *sk) { int id = hci_pi(sk)->cookie; if (id) { hci_pi(sk)->cookie = 0xffffffff; ida_simple_remove(&sock_cookie_ida, id); } } static inline int hci_test_bit(int nr, const void *addr) { return *((const __u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31)); } /* Security filter */ #define HCI_SFLT_MAX_OGF 5 struct hci_sec_filter { __u32 type_mask; __u32 event_mask[2]; __u32 ocf_mask[HCI_SFLT_MAX_OGF + 1][4]; }; static const struct hci_sec_filter hci_sec_filter = { /* Packet types */ 0x10, /* Events */ { 0x1000d9fe, 0x0000b00c }, /* Commands */ { { 0x0 }, /* OGF_LINK_CTL */ { 0xbe000006, 0x00000001, 0x00000000, 0x00 }, /* OGF_LINK_POLICY */ { 0x00005200, 0x00000000, 0x00000000, 0x00 }, /* OGF_HOST_CTL */ { 0xaab00200, 0x2b402aaa, 0x05220154, 0x00 }, /* OGF_INFO_PARAM */ { 0x000002be, 0x00000000, 0x00000000, 0x00 }, /* OGF_STATUS_PARAM */ { 0x000000ea, 0x00000000, 0x00000000, 0x00 } } }; static struct bt_sock_list hci_sk_list = { .lock = __RW_LOCK_UNLOCKED(hci_sk_list.lock) }; static bool is_filtered_packet(struct sock *sk, struct sk_buff *skb) { struct hci_filter *flt; int flt_type, flt_event; /* Apply filter */ flt = &hci_pi(sk)->filter; flt_type = hci_skb_pkt_type(skb) & HCI_FLT_TYPE_BITS; if (!test_bit(flt_type, &flt->type_mask)) return true; /* Extra filter for event packets only */ if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT) return false; flt_event = (*(__u8 *)skb->data & HCI_FLT_EVENT_BITS); if (!hci_test_bit(flt_event, &flt->event_mask)) return true; /* Check filter only when opcode is set */ if (!flt->opcode) return false; if (flt_event == HCI_EV_CMD_COMPLETE && flt->opcode != get_unaligned((__le16 *)(skb->data + 3))) return true; if (flt_event == HCI_EV_CMD_STATUS && flt->opcode != get_unaligned((__le16 *)(skb->data + 4))) return true; return false; } /* Send frame to RAW socket */ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) { struct sock *sk; struct sk_buff *skb_copy = NULL; BT_DBG("hdev %p len %d", hdev, skb->len); read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *nskb; if (sk->sk_state != BT_BOUND || hci_pi(sk)->hdev != hdev) continue; /* Don't send frame to the socket it came from */ if (skb->sk == sk) continue; if (hci_pi(sk)->channel == HCI_CHANNEL_RAW) { if (hci_skb_pkt_type(skb) != HCI_COMMAND_PKT && hci_skb_pkt_type(skb) != HCI_EVENT_PKT && hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && hci_skb_pkt_type(skb) != HCI_SCODATA_PKT && hci_skb_pkt_type(skb) != HCI_ISODATA_PKT) continue; if (is_filtered_packet(sk, skb)) continue; } else if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { if (!bt_cb(skb)->incoming) continue; if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT && hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && hci_skb_pkt_type(skb) != HCI_SCODATA_PKT && hci_skb_pkt_type(skb) != HCI_ISODATA_PKT) continue; } else { /* Don't send frame to other channel types */ continue; } if (!skb_copy) { /* Create a private copy with headroom */ skb_copy = __pskb_copy_fclone(skb, 1, GFP_ATOMIC, true); if (!skb_copy) continue; /* Put type byte before the data */ memcpy(skb_push(skb_copy, 1), &hci_skb_pkt_type(skb), 1); } nskb = skb_clone(skb_copy, GFP_ATOMIC); if (!nskb) continue; if (sock_queue_rcv_skb(sk, nskb)) kfree_skb(nskb); } read_unlock(&hci_sk_list.lock); kfree_skb(skb_copy); } /* Send frame to sockets with specific channel */ static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb, int flag, struct sock *skip_sk) { struct sock *sk; BT_DBG("channel %u len %d", channel, skb->len); sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *nskb; /* Ignore socket without the flag set */ if (!hci_sock_test_flag(sk, flag)) continue; /* Skip the original socket */ if (sk == skip_sk) continue; if (sk->sk_state != BT_BOUND) continue; if (hci_pi(sk)->channel != channel) continue; nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) continue; if (sock_queue_rcv_skb(sk, nskb)) kfree_skb(nskb); } } void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, int flag, struct sock *skip_sk) { read_lock(&hci_sk_list.lock); __hci_send_to_channel(channel, skb, flag, skip_sk); read_unlock(&hci_sk_list.lock); } /* Send frame to monitor socket */ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) { struct sk_buff *skb_copy = NULL; struct hci_mon_hdr *hdr; __le16 opcode; if (!atomic_read(&monitor_promisc)) return; BT_DBG("hdev %p len %d", hdev, skb->len); switch (hci_skb_pkt_type(skb)) { case HCI_COMMAND_PKT: opcode = cpu_to_le16(HCI_MON_COMMAND_PKT); break; case HCI_EVENT_PKT: opcode = cpu_to_le16(HCI_MON_EVENT_PKT); break; case HCI_ACLDATA_PKT: if (bt_cb(skb)->incoming) opcode = cpu_to_le16(HCI_MON_ACL_RX_PKT); else opcode = cpu_to_le16(HCI_MON_ACL_TX_PKT); break; case HCI_SCODATA_PKT: if (bt_cb(skb)->incoming) opcode = cpu_to_le16(HCI_MON_SCO_RX_PKT); else opcode = cpu_to_le16(HCI_MON_SCO_TX_PKT); break; case HCI_ISODATA_PKT: if (bt_cb(skb)->incoming) opcode = cpu_to_le16(HCI_MON_ISO_RX_PKT); else opcode = cpu_to_le16(HCI_MON_ISO_TX_PKT); break; case HCI_DIAG_PKT: opcode = cpu_to_le16(HCI_MON_VENDOR_DIAG); break; default: return; } /* Create a private copy with headroom */ skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE, GFP_ATOMIC, true); if (!skb_copy) return; /* Put header before the data */ hdr = skb_push(skb_copy, HCI_MON_HDR_SIZE); hdr->opcode = opcode; hdr->index = cpu_to_le16(hdev->id); hdr->len = cpu_to_le16(skb->len); hci_send_to_channel(HCI_CHANNEL_MONITOR, skb_copy, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb_copy); } void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event, void *data, u16 data_len, ktime_t tstamp, int flag, struct sock *skip_sk) { struct sock *sk; __le16 index; if (hdev) index = cpu_to_le16(hdev->id); else index = cpu_to_le16(MGMT_INDEX_NONE); read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { struct hci_mon_hdr *hdr; struct sk_buff *skb; if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL) continue; /* Ignore socket without the flag set */ if (!hci_sock_test_flag(sk, flag)) continue; /* Skip the original socket */ if (sk == skip_sk) continue; skb = bt_skb_alloc(6 + data_len, GFP_ATOMIC); if (!skb) continue; put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); put_unaligned_le16(event, skb_put(skb, 2)); if (data) skb_put_data(skb, data, data_len); skb->tstamp = tstamp; hdr = skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT); hdr->index = index; hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); __hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } read_unlock(&hci_sk_list.lock); } static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) { struct hci_mon_hdr *hdr; struct hci_mon_new_index *ni; struct hci_mon_index_info *ii; struct sk_buff *skb; __le16 opcode; switch (event) { case HCI_DEV_REG: skb = bt_skb_alloc(HCI_MON_NEW_INDEX_SIZE, GFP_ATOMIC); if (!skb) return NULL; ni = skb_put(skb, HCI_MON_NEW_INDEX_SIZE); ni->type = hdev->dev_type; ni->bus = hdev->bus; bacpy(&ni->bdaddr, &hdev->bdaddr); memcpy(ni->name, hdev->name, 8); opcode = cpu_to_le16(HCI_MON_NEW_INDEX); break; case HCI_DEV_UNREG: skb = bt_skb_alloc(0, GFP_ATOMIC); if (!skb) return NULL; opcode = cpu_to_le16(HCI_MON_DEL_INDEX); break; case HCI_DEV_SETUP: if (hdev->manufacturer == 0xffff) return NULL; fallthrough; case HCI_DEV_UP: skb = bt_skb_alloc(HCI_MON_INDEX_INFO_SIZE, GFP_ATOMIC); if (!skb) return NULL; ii = skb_put(skb, HCI_MON_INDEX_INFO_SIZE); bacpy(&ii->bdaddr, &hdev->bdaddr); ii->manufacturer = cpu_to_le16(hdev->manufacturer); opcode = cpu_to_le16(HCI_MON_INDEX_INFO); break; case HCI_DEV_OPEN: skb = bt_skb_alloc(0, GFP_ATOMIC); if (!skb) return NULL; opcode = cpu_to_le16(HCI_MON_OPEN_INDEX); break; case HCI_DEV_CLOSE: skb = bt_skb_alloc(0, GFP_ATOMIC); if (!skb) return NULL; opcode = cpu_to_le16(HCI_MON_CLOSE_INDEX); break; default: return NULL; } __net_timestamp(skb); hdr = skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = opcode; hdr->index = cpu_to_le16(hdev->id); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); return skb; } static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) { struct hci_mon_hdr *hdr; struct sk_buff *skb; u16 format; u8 ver[3]; u32 flags; /* No message needed when cookie is not present */ if (!hci_pi(sk)->cookie) return NULL; switch (hci_pi(sk)->channel) { case HCI_CHANNEL_RAW: format = 0x0000; ver[0] = BT_SUBSYS_VERSION; put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1); break; case HCI_CHANNEL_USER: format = 0x0001; ver[0] = BT_SUBSYS_VERSION; put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1); break; case HCI_CHANNEL_CONTROL: format = 0x0002; mgmt_fill_version_info(ver); break; default: /* No message for unsupported format */ return NULL; } skb = bt_skb_alloc(14 + TASK_COMM_LEN , GFP_ATOMIC); if (!skb) return NULL; flags = hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) ? 0x1 : 0x0; put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); put_unaligned_le16(format, skb_put(skb, 2)); skb_put_data(skb, ver, sizeof(ver)); put_unaligned_le32(flags, skb_put(skb, 4)); skb_put_u8(skb, TASK_COMM_LEN); skb_put_data(skb, hci_pi(sk)->comm, TASK_COMM_LEN); __net_timestamp(skb); hdr = skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_OPEN); if (hci_pi(sk)->hdev) hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id); else hdr->index = cpu_to_le16(HCI_DEV_NONE); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); return skb; } static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) { struct hci_mon_hdr *hdr; struct sk_buff *skb; /* No message needed when cookie is not present */ if (!hci_pi(sk)->cookie) return NULL; switch (hci_pi(sk)->channel) { case HCI_CHANNEL_RAW: case HCI_CHANNEL_USER: case HCI_CHANNEL_CONTROL: break; default: /* No message for unsupported format */ return NULL; } skb = bt_skb_alloc(4, GFP_ATOMIC); if (!skb) return NULL; put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); __net_timestamp(skb); hdr = skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_CLOSE); if (hci_pi(sk)->hdev) hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id); else hdr->index = cpu_to_le16(HCI_DEV_NONE); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); return skb; } static struct sk_buff *create_monitor_ctrl_command(struct sock *sk, u16 index, u16 opcode, u16 len, const void *buf) { struct hci_mon_hdr *hdr; struct sk_buff *skb; skb = bt_skb_alloc(6 + len, GFP_ATOMIC); if (!skb) return NULL; put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); put_unaligned_le16(opcode, skb_put(skb, 2)); if (buf) skb_put_data(skb, buf, len); __net_timestamp(skb); hdr = skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_COMMAND); hdr->index = cpu_to_le16(index); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); return skb; } static void __printf(2, 3) send_monitor_note(struct sock *sk, const char *fmt, ...) { size_t len; struct hci_mon_hdr *hdr; struct sk_buff *skb; va_list args; va_start(args, fmt); len = vsnprintf(NULL, 0, fmt, args); va_end(args); skb = bt_skb_alloc(len + 1, GFP_ATOMIC); if (!skb) return; va_start(args, fmt); vsprintf(skb_put(skb, len), fmt, args); *(u8 *)skb_put(skb, 1) = 0; va_end(args); __net_timestamp(skb); hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_SYSTEM_NOTE); hdr->index = cpu_to_le16(HCI_DEV_NONE); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); if (sock_queue_rcv_skb(sk, skb)) kfree_skb(skb); } static void send_monitor_replay(struct sock *sk) { struct hci_dev *hdev; read_lock(&hci_dev_list_lock); list_for_each_entry(hdev, &hci_dev_list, list) { struct sk_buff *skb; skb = create_monitor_event(hdev, HCI_DEV_REG); if (!skb) continue; if (sock_queue_rcv_skb(sk, skb)) kfree_skb(skb); if (!test_bit(HCI_RUNNING, &hdev->flags)) continue; skb = create_monitor_event(hdev, HCI_DEV_OPEN); if (!skb) continue; if (sock_queue_rcv_skb(sk, skb)) kfree_skb(skb); if (test_bit(HCI_UP, &hdev->flags)) skb = create_monitor_event(hdev, HCI_DEV_UP); else if (hci_dev_test_flag(hdev, HCI_SETUP)) skb = create_monitor_event(hdev, HCI_DEV_SETUP); else skb = NULL; if (skb) { if (sock_queue_rcv_skb(sk, skb)) kfree_skb(skb); } } read_unlock(&hci_dev_list_lock); } static void send_monitor_control_replay(struct sock *mon_sk) { struct sock *sk; read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *skb; skb = create_monitor_ctrl_open(sk); if (!skb) continue; if (sock_queue_rcv_skb(mon_sk, skb)) kfree_skb(skb); } read_unlock(&hci_sk_list.lock); } /* Generate internal stack event */ static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) { struct hci_event_hdr *hdr; struct hci_ev_stack_internal *ev; struct sk_buff *skb; skb = bt_skb_alloc(HCI_EVENT_HDR_SIZE + sizeof(*ev) + dlen, GFP_ATOMIC); if (!skb) return; hdr = skb_put(skb, HCI_EVENT_HDR_SIZE); hdr->evt = HCI_EV_STACK_INTERNAL; hdr->plen = sizeof(*ev) + dlen; ev = skb_put(skb, sizeof(*ev) + dlen); ev->type = type; memcpy(ev->data, data, dlen); bt_cb(skb)->incoming = 1; __net_timestamp(skb); hci_skb_pkt_type(skb) = HCI_EVENT_PKT; hci_send_to_sock(hdev, skb); kfree_skb(skb); } void hci_sock_dev_event(struct hci_dev *hdev, int event) { BT_DBG("hdev %s event %d", hdev->name, event); if (atomic_read(&monitor_promisc)) { struct sk_buff *skb; /* Send event to monitor */ skb = create_monitor_event(hdev, event); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } } if (event <= HCI_DEV_DOWN) { struct hci_ev_si_device ev; /* Send event to sockets */ ev.event = event; ev.dev_id = hdev->id; hci_si_event(NULL, HCI_EV_SI_DEVICE, sizeof(ev), &ev); } if (event == HCI_DEV_UNREG) { struct sock *sk; /* Wake up sockets using this dead device */ read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { if (hci_pi(sk)->hdev == hdev) { sk->sk_err = EPIPE; sk->sk_state_change(sk); } } read_unlock(&hci_sk_list.lock); } } static struct hci_mgmt_chan *__hci_mgmt_chan_find(unsigned short channel) { struct hci_mgmt_chan *c; list_for_each_entry(c, &mgmt_chan_list, list) { if (c->channel == channel) return c; } return NULL; } static struct hci_mgmt_chan *hci_mgmt_chan_find(unsigned short channel) { struct hci_mgmt_chan *c; mutex_lock(&mgmt_chan_list_lock); c = __hci_mgmt_chan_find(channel); mutex_unlock(&mgmt_chan_list_lock); return c; } int hci_mgmt_chan_register(struct hci_mgmt_chan *c) { if (c->channel < HCI_CHANNEL_CONTROL) return -EINVAL; mutex_lock(&mgmt_chan_list_lock); if (__hci_mgmt_chan_find(c->channel)) { mutex_unlock(&mgmt_chan_list_lock); return -EALREADY; } list_add_tail(&c->list, &mgmt_chan_list); mutex_unlock(&mgmt_chan_list_lock); return 0; } EXPORT_SYMBOL(hci_mgmt_chan_register); void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c) { mutex_lock(&mgmt_chan_list_lock); list_del(&c->list); mutex_unlock(&mgmt_chan_list_lock); } EXPORT_SYMBOL(hci_mgmt_chan_unregister); static int hci_sock_release(struct socket *sock) { struct sock *sk = sock->sk; struct hci_dev *hdev; struct sk_buff *skb; BT_DBG("sock %p sk %p", sock, sk); if (!sk) return 0; lock_sock(sk); switch (hci_pi(sk)->channel) { case HCI_CHANNEL_MONITOR: atomic_dec(&monitor_promisc); break; case HCI_CHANNEL_RAW: case HCI_CHANNEL_USER: case HCI_CHANNEL_CONTROL: /* Send event to monitor */ skb = create_monitor_ctrl_close(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } hci_sock_free_cookie(sk); break; } bt_sock_unlink(&hci_sk_list, sk); hdev = hci_pi(sk)->hdev; if (hdev) { if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { /* When releasing a user channel exclusive access, * call hci_dev_do_close directly instead of calling * hci_dev_close to ensure the exclusive access will * be released and the controller brought back down. * * The checking of HCI_AUTO_OFF is not needed in this * case since it will have been cleared already when * opening the user channel. */ hci_dev_do_close(hdev); hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); mgmt_index_added(hdev); } atomic_dec(&hdev->promisc); hci_dev_put(hdev); } sock_orphan(sk); skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); release_sock(sk); sock_put(sk); return 0; } static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg) { bdaddr_t bdaddr; int err; if (copy_from_user(&bdaddr, arg, sizeof(bdaddr))) return -EFAULT; hci_dev_lock(hdev); err = hci_bdaddr_list_add(&hdev->blacklist, &bdaddr, BDADDR_BREDR); hci_dev_unlock(hdev); return err; } static int hci_sock_blacklist_del(struct hci_dev *hdev, void __user *arg) { bdaddr_t bdaddr; int err; if (copy_from_user(&bdaddr, arg, sizeof(bdaddr))) return -EFAULT; hci_dev_lock(hdev); err = hci_bdaddr_list_del(&hdev->blacklist, &bdaddr, BDADDR_BREDR); hci_dev_unlock(hdev); return err; } /* Ioctls that require bound socket */ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) { struct hci_dev *hdev = hci_hdev_from_sock(sk); if (IS_ERR(hdev)) return PTR_ERR(hdev); if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) return -EBUSY; if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) return -EOPNOTSUPP; if (hdev->dev_type != HCI_PRIMARY) return -EOPNOTSUPP; switch (cmd) { case HCISETRAW: if (!capable(CAP_NET_ADMIN)) return -EPERM; return -EOPNOTSUPP; case HCIGETCONNINFO: return hci_get_conn_info(hdev, (void __user *)arg); case HCIGETAUTHINFO: return hci_get_auth_info(hdev, (void __user *)arg); case HCIBLOCKADDR: if (!capable(CAP_NET_ADMIN)) return -EPERM; return hci_sock_blacklist_add(hdev, (void __user *)arg); case HCIUNBLOCKADDR: if (!capable(CAP_NET_ADMIN)) return -EPERM; return hci_sock_blacklist_del(hdev, (void __user *)arg); } return -ENOIOCTLCMD; } static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct sock *sk = sock->sk; int err; BT_DBG("cmd %x arg %lx", cmd, arg); lock_sock(sk); if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) { err = -EBADFD; goto done; } /* When calling an ioctl on an unbound raw socket, then ensure * that the monitor gets informed. Ensure that the resulting event * is only send once by checking if the cookie exists or not. The * socket cookie will be only ever generated once for the lifetime * of a given socket. */ if (hci_sock_gen_cookie(sk)) { struct sk_buff *skb; if (capable(CAP_NET_ADMIN)) hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); /* Send event to monitor */ skb = create_monitor_ctrl_open(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } } release_sock(sk); switch (cmd) { case HCIGETDEVLIST: return hci_get_dev_list(argp); case HCIGETDEVINFO: return hci_get_dev_info(argp); case HCIGETCONNLIST: return hci_get_conn_list(argp); case HCIDEVUP: if (!capable(CAP_NET_ADMIN)) return -EPERM; return hci_dev_open(arg); case HCIDEVDOWN: if (!capable(CAP_NET_ADMIN)) return -EPERM; return hci_dev_close(arg); case HCIDEVRESET: if (!capable(CAP_NET_ADMIN)) return -EPERM; return hci_dev_reset(arg); case HCIDEVRESTAT: if (!capable(CAP_NET_ADMIN)) return -EPERM; return hci_dev_reset_stat(arg); case HCISETSCAN: case HCISETAUTH: case HCISETENCRYPT: case HCISETPTYPE: case HCISETLINKPOL: case HCISETLINKMODE: case HCISETACLMTU: case HCISETSCOMTU: if (!capable(CAP_NET_ADMIN)) return -EPERM; return hci_dev_cmd(cmd, argp); case HCIINQUIRY: return hci_inquiry(argp); } lock_sock(sk); err = hci_sock_bound_ioctl(sk, cmd, arg); done: release_sock(sk); return err; } #ifdef CONFIG_COMPAT static int hci_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { switch (cmd) { case HCIDEVUP: case HCIDEVDOWN: case HCIDEVRESET: case HCIDEVRESTAT: return hci_sock_ioctl(sock, cmd, arg); } return hci_sock_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); } #endif static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sockaddr_hci haddr; struct sock *sk = sock->sk; struct hci_dev *hdev = NULL; struct sk_buff *skb; int len, err = 0; BT_DBG("sock %p sk %p", sock, sk); if (!addr) return -EINVAL; memset(&haddr, 0, sizeof(haddr)); len = min_t(unsigned int, sizeof(haddr), addr_len); memcpy(&haddr, addr, len); if (haddr.hci_family != AF_BLUETOOTH) return -EINVAL; lock_sock(sk); /* Allow detaching from dead device and attaching to alive device, if * the caller wants to re-bind (instead of close) this socket in * response to hci_sock_dev_event(HCI_DEV_UNREG) notification. */ hdev = hci_pi(sk)->hdev; if (hdev && hci_dev_test_flag(hdev, HCI_UNREGISTER)) { hci_pi(sk)->hdev = NULL; sk->sk_state = BT_OPEN; hci_dev_put(hdev); } hdev = NULL; if (sk->sk_state == BT_BOUND) { err = -EALREADY; goto done; } switch (haddr.hci_channel) { case HCI_CHANNEL_RAW: if (hci_pi(sk)->hdev) { err = -EALREADY; goto done; } if (haddr.hci_dev != HCI_DEV_NONE) { hdev = hci_dev_get(haddr.hci_dev); if (!hdev) { err = -ENODEV; goto done; } atomic_inc(&hdev->promisc); } hci_pi(sk)->channel = haddr.hci_channel; if (!hci_sock_gen_cookie(sk)) { /* In the case when a cookie has already been assigned, * then there has been already an ioctl issued against * an unbound socket and with that triggerd an open * notification. Send a close notification first to * allow the state transition to bounded. */ skb = create_monitor_ctrl_close(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } } if (capable(CAP_NET_ADMIN)) hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); hci_pi(sk)->hdev = hdev; /* Send event to monitor */ skb = create_monitor_ctrl_open(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } break; case HCI_CHANNEL_USER: if (hci_pi(sk)->hdev) { err = -EALREADY; goto done; } if (haddr.hci_dev == HCI_DEV_NONE) { err = -EINVAL; goto done; } if (!capable(CAP_NET_ADMIN)) { err = -EPERM; goto done; } hdev = hci_dev_get(haddr.hci_dev); if (!hdev) { err = -ENODEV; goto done; } if (test_bit(HCI_INIT, &hdev->flags) || hci_dev_test_flag(hdev, HCI_SETUP) || hci_dev_test_flag(hdev, HCI_CONFIG) || (!hci_dev_test_flag(hdev, HCI_AUTO_OFF) && test_bit(HCI_UP, &hdev->flags))) { err = -EBUSY; hci_dev_put(hdev); goto done; } if (hci_dev_test_and_set_flag(hdev, HCI_USER_CHANNEL)) { err = -EUSERS; hci_dev_put(hdev); goto done; } mgmt_index_removed(hdev); err = hci_dev_open(hdev->id); if (err) { if (err == -EALREADY) { /* In case the transport is already up and * running, clear the error here. * * This can happen when opening a user * channel and HCI_AUTO_OFF grace period * is still active. */ err = 0; } else { hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); mgmt_index_added(hdev); hci_dev_put(hdev); goto done; } } hci_pi(sk)->channel = haddr.hci_channel; if (!hci_sock_gen_cookie(sk)) { /* In the case when a cookie has already been assigned, * this socket will transition from a raw socket into * a user channel socket. For a clean transition, send * the close notification first. */ skb = create_monitor_ctrl_close(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } } /* The user channel is restricted to CAP_NET_ADMIN * capabilities and with that implicitly trusted. */ hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); hci_pi(sk)->hdev = hdev; /* Send event to monitor */ skb = create_monitor_ctrl_open(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } atomic_inc(&hdev->promisc); break; case HCI_CHANNEL_MONITOR: if (haddr.hci_dev != HCI_DEV_NONE) { err = -EINVAL; goto done; } if (!capable(CAP_NET_RAW)) { err = -EPERM; goto done; } hci_pi(sk)->channel = haddr.hci_channel; /* The monitor interface is restricted to CAP_NET_RAW * capabilities and with that implicitly trusted. */ hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); send_monitor_note(sk, "Linux version %s (%s)", init_utsname()->release, init_utsname()->machine); send_monitor_note(sk, "Bluetooth subsystem version %u.%u", BT_SUBSYS_VERSION, BT_SUBSYS_REVISION); send_monitor_replay(sk); send_monitor_control_replay(sk); atomic_inc(&monitor_promisc); break; case HCI_CHANNEL_LOGGING: if (haddr.hci_dev != HCI_DEV_NONE) { err = -EINVAL; goto done; } if (!capable(CAP_NET_ADMIN)) { err = -EPERM; goto done; } hci_pi(sk)->channel = haddr.hci_channel; break; default: if (!hci_mgmt_chan_find(haddr.hci_channel)) { err = -EINVAL; goto done; } if (haddr.hci_dev != HCI_DEV_NONE) { err = -EINVAL; goto done; } /* Users with CAP_NET_ADMIN capabilities are allowed * access to all management commands and events. For * untrusted users the interface is restricted and * also only untrusted events are sent. */ if (capable(CAP_NET_ADMIN)) hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); hci_pi(sk)->channel = haddr.hci_channel; /* At the moment the index and unconfigured index events * are enabled unconditionally. Setting them on each * socket when binding keeps this functionality. They * however might be cleared later and then sending of these * events will be disabled, but that is then intentional. * * This also enables generic events that are safe to be * received by untrusted users. Example for such events * are changes to settings, class of device, name etc. */ if (hci_pi(sk)->channel == HCI_CHANNEL_CONTROL) { if (!hci_sock_gen_cookie(sk)) { /* In the case when a cookie has already been * assigned, this socket will transtion from * a raw socket into a control socket. To * allow for a clean transtion, send the * close notification first. */ skb = create_monitor_ctrl_close(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } } /* Send event to monitor */ skb = create_monitor_ctrl_open(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_OPTION_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_SETTING_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS); } break; } sk->sk_state = BT_BOUND; done: release_sock(sk); return err; } static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, int peer) { struct sockaddr_hci *haddr = (struct sockaddr_hci *)addr; struct sock *sk = sock->sk; struct hci_dev *hdev; int err = 0; BT_DBG("sock %p sk %p", sock, sk); if (peer) return -EOPNOTSUPP; lock_sock(sk); hdev = hci_hdev_from_sock(sk); if (IS_ERR(hdev)) { err = PTR_ERR(hdev); goto done; } haddr->hci_family = AF_BLUETOOTH; haddr->hci_dev = hdev->id; haddr->hci_channel= hci_pi(sk)->channel; err = sizeof(*haddr); done: release_sock(sk); return err; } static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) { __u8 mask = hci_pi(sk)->cmsg_mask; if (mask & HCI_CMSG_DIR) { int incoming = bt_cb(skb)->incoming; put_cmsg(msg, SOL_HCI, HCI_CMSG_DIR, sizeof(incoming), &incoming); } if (mask & HCI_CMSG_TSTAMP) { #ifdef CONFIG_COMPAT struct old_timeval32 ctv; #endif struct __kernel_old_timeval tv; void *data; int len; skb_get_timestamp(skb, &tv); data = &tv; len = sizeof(tv); #ifdef CONFIG_COMPAT if (!COMPAT_USE_64BIT_TIME && (msg->msg_flags & MSG_CMSG_COMPAT)) { ctv.tv_sec = tv.tv_sec; ctv.tv_usec = tv.tv_usec; data = &ctv; len = sizeof(ctv); } #endif put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, len, data); } } static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; unsigned int skblen; BT_DBG("sock %p, sk %p", sock, sk); if (flags & MSG_OOB) return -EOPNOTSUPP; if (hci_pi(sk)->channel == HCI_CHANNEL_LOGGING) return -EOPNOTSUPP; if (sk->sk_state == BT_CLOSED) return 0; skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) return err; skblen = skb->len; copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; copied = len; } skb_reset_transport_header(skb); err = skb_copy_datagram_msg(skb, 0, msg, copied); switch (hci_pi(sk)->channel) { case HCI_CHANNEL_RAW: hci_sock_cmsg(sk, msg, skb); break; case HCI_CHANNEL_USER: case HCI_CHANNEL_MONITOR: sock_recv_timestamp(msg, sk, skb); break; default: if (hci_mgmt_chan_find(hci_pi(sk)->channel)) sock_recv_timestamp(msg, sk, skb); break; } skb_free_datagram(sk, skb); if (flags & MSG_TRUNC) copied = skblen; return err ? : copied; } static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk, struct msghdr *msg, size_t msglen) { void *buf; u8 *cp; struct mgmt_hdr *hdr; u16 opcode, index, len; struct hci_dev *hdev = NULL; const struct hci_mgmt_handler *handler; bool var_len, no_hdev; int err; BT_DBG("got %zu bytes", msglen); if (msglen < sizeof(*hdr)) return -EINVAL; buf = kmalloc(msglen, GFP_KERNEL); if (!buf) return -ENOMEM; if (memcpy_from_msg(buf, msg, msglen)) { err = -EFAULT; goto done; } hdr = buf; opcode = __le16_to_cpu(hdr->opcode); index = __le16_to_cpu(hdr->index); len = __le16_to_cpu(hdr->len); if (len != msglen - sizeof(*hdr)) { err = -EINVAL; goto done; } if (chan->channel == HCI_CHANNEL_CONTROL) { struct sk_buff *skb; /* Send event to monitor */ skb = create_monitor_ctrl_command(sk, index, opcode, len, buf + sizeof(*hdr)); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } } if (opcode >= chan->handler_count || chan->handlers[opcode].func == NULL) { BT_DBG("Unknown op %u", opcode); err = mgmt_cmd_status(sk, index, opcode, MGMT_STATUS_UNKNOWN_COMMAND); goto done; } handler = &chan->handlers[opcode]; if (!hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) && !(handler->flags & HCI_MGMT_UNTRUSTED)) { err = mgmt_cmd_status(sk, index, opcode, MGMT_STATUS_PERMISSION_DENIED); goto done; } if (index != MGMT_INDEX_NONE) { hdev = hci_dev_get(index); if (!hdev) { err = mgmt_cmd_status(sk, index, opcode, MGMT_STATUS_INVALID_INDEX); goto done; } if (hci_dev_test_flag(hdev, HCI_SETUP) || hci_dev_test_flag(hdev, HCI_CONFIG) || hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { err = mgmt_cmd_status(sk, index, opcode, MGMT_STATUS_INVALID_INDEX); goto done; } if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && !(handler->flags & HCI_MGMT_UNCONFIGURED)) { err = mgmt_cmd_status(sk, index, opcode, MGMT_STATUS_INVALID_INDEX); goto done; } } if (!(handler->flags & HCI_MGMT_HDEV_OPTIONAL)) { no_hdev = (handler->flags & HCI_MGMT_NO_HDEV); if (no_hdev != !hdev) { err = mgmt_cmd_status(sk, index, opcode, MGMT_STATUS_INVALID_INDEX); goto done; } } var_len = (handler->flags & HCI_MGMT_VAR_LEN); if ((var_len && len < handler->data_len) || (!var_len && len != handler->data_len)) { err = mgmt_cmd_status(sk, index, opcode, MGMT_STATUS_INVALID_PARAMS); goto done; } if (hdev && chan->hdev_init) chan->hdev_init(sk, hdev); cp = buf + sizeof(*hdr); err = handler->func(sk, hdev, cp, len); if (err < 0) goto done; err = msglen; done: if (hdev) hci_dev_put(hdev); kfree(buf); return err; } static int hci_logging_frame(struct sock *sk, struct msghdr *msg, int len) { struct hci_mon_hdr *hdr; struct sk_buff *skb; struct hci_dev *hdev; u16 index; int err; /* The logging frame consists at minimum of the standard header, * the priority byte, the ident length byte and at least one string * terminator NUL byte. Anything shorter are invalid packets. */ if (len < sizeof(*hdr) + 3) return -EINVAL; skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) return err; if (memcpy_from_msg(skb_put(skb, len), msg, len)) { err = -EFAULT; goto drop; } hdr = (void *)skb->data; if (__le16_to_cpu(hdr->len) != len - sizeof(*hdr)) { err = -EINVAL; goto drop; } if (__le16_to_cpu(hdr->opcode) == 0x0000) { __u8 priority = skb->data[sizeof(*hdr)]; __u8 ident_len = skb->data[sizeof(*hdr) + 1]; /* Only the priorities 0-7 are valid and with that any other * value results in an invalid packet. * * The priority byte is followed by an ident length byte and * the NUL terminated ident string. Check that the ident * length is not overflowing the packet and also that the * ident string itself is NUL terminated. In case the ident * length is zero, the length value actually doubles as NUL * terminator identifier. * * The message follows the ident string (if present) and * must be NUL terminated. Otherwise it is not a valid packet. */ if (priority > 7 || skb->data[len - 1] != 0x00 || ident_len > len - sizeof(*hdr) - 3 || skb->data[sizeof(*hdr) + ident_len + 1] != 0x00) { err = -EINVAL; goto drop; } } else { err = -EINVAL; goto drop; } index = __le16_to_cpu(hdr->index); if (index != MGMT_INDEX_NONE) { hdev = hci_dev_get(index); if (!hdev) { err = -ENODEV; goto drop; } } else { hdev = NULL; } hdr->opcode = cpu_to_le16(HCI_MON_USER_LOGGING); hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); err = len; if (hdev) hci_dev_put(hdev); drop: kfree_skb(skb); return err; } static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct hci_mgmt_chan *chan; struct hci_dev *hdev; struct sk_buff *skb; int err; BT_DBG("sock %p sk %p", sock, sk); if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_NOSIGNAL|MSG_ERRQUEUE| MSG_CMSG_COMPAT)) return -EINVAL; if (len < 4 || len > HCI_MAX_FRAME_SIZE) return -EINVAL; lock_sock(sk); switch (hci_pi(sk)->channel) { case HCI_CHANNEL_RAW: case HCI_CHANNEL_USER: break; case HCI_CHANNEL_MONITOR: err = -EOPNOTSUPP; goto done; case HCI_CHANNEL_LOGGING: err = hci_logging_frame(sk, msg, len); goto done; default: mutex_lock(&mgmt_chan_list_lock); chan = __hci_mgmt_chan_find(hci_pi(sk)->channel); if (chan) err = hci_mgmt_cmd(chan, sk, msg, len); else err = -EINVAL; mutex_unlock(&mgmt_chan_list_lock); goto done; } hdev = hci_hdev_from_sock(sk); if (IS_ERR(hdev)) { err = PTR_ERR(hdev); goto done; } if (!test_bit(HCI_UP, &hdev->flags)) { err = -ENETDOWN; goto done; } skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) goto done; if (memcpy_from_msg(skb_put(skb, len), msg, len)) { err = -EFAULT; goto drop; } hci_skb_pkt_type(skb) = skb->data[0]; skb_pull(skb, 1); if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { /* No permission check is needed for user channel * since that gets enforced when binding the socket. * * However check that the packet type is valid. */ if (hci_skb_pkt_type(skb) != HCI_COMMAND_PKT && hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && hci_skb_pkt_type(skb) != HCI_SCODATA_PKT && hci_skb_pkt_type(skb) != HCI_ISODATA_PKT) { err = -EINVAL; goto drop; } skb_queue_tail(&hdev->raw_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); } else if (hci_skb_pkt_type(skb) == HCI_COMMAND_PKT) { u16 opcode = get_unaligned_le16(skb->data); u16 ogf = hci_opcode_ogf(opcode); u16 ocf = hci_opcode_ocf(opcode); if (((ogf > HCI_SFLT_MAX_OGF) || !hci_test_bit(ocf & HCI_FLT_OCF_BITS, &hci_sec_filter.ocf_mask[ogf])) && !capable(CAP_NET_RAW)) { err = -EPERM; goto drop; } /* Since the opcode has already been extracted here, store * a copy of the value for later use by the drivers. */ hci_skb_opcode(skb) = opcode; if (ogf == 0x3f) { skb_queue_tail(&hdev->raw_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); } else { /* Stand-alone HCI commands must be flagged as * single-command requests. */ bt_cb(skb)->hci.req_flags |= HCI_REQ_START; skb_queue_tail(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); } } else { if (!capable(CAP_NET_RAW)) { err = -EPERM; goto drop; } if (hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && hci_skb_pkt_type(skb) != HCI_SCODATA_PKT && hci_skb_pkt_type(skb) != HCI_ISODATA_PKT) { err = -EINVAL; goto drop; } skb_queue_tail(&hdev->raw_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); } err = len; done: release_sock(sk); return err; drop: kfree_skb(skb); goto done; } static int hci_sock_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int len) { struct hci_ufilter uf = { .opcode = 0 }; struct sock *sk = sock->sk; int err = 0, opt = 0; BT_DBG("sk %p, opt %d", sk, optname); if (level != SOL_HCI) return -ENOPROTOOPT; lock_sock(sk); if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) { err = -EBADFD; goto done; } switch (optname) { case HCI_DATA_DIR: if (copy_from_sockptr(&opt, optval, sizeof(opt))) { err = -EFAULT; break; } if (opt) hci_pi(sk)->cmsg_mask |= HCI_CMSG_DIR; else hci_pi(sk)->cmsg_mask &= ~HCI_CMSG_DIR; break; case HCI_TIME_STAMP: if (copy_from_sockptr(&opt, optval, sizeof(opt))) { err = -EFAULT; break; } if (opt) hci_pi(sk)->cmsg_mask |= HCI_CMSG_TSTAMP; else hci_pi(sk)->cmsg_mask &= ~HCI_CMSG_TSTAMP; break; case HCI_FILTER: { struct hci_filter *f = &hci_pi(sk)->filter; uf.type_mask = f->type_mask; uf.opcode = f->opcode; uf.event_mask[0] = *((u32 *) f->event_mask + 0); uf.event_mask[1] = *((u32 *) f->event_mask + 1); } len = min_t(unsigned int, len, sizeof(uf)); if (copy_from_sockptr(&uf, optval, len)) { err = -EFAULT; break; } if (!capable(CAP_NET_RAW)) { uf.type_mask &= hci_sec_filter.type_mask; uf.event_mask[0] &= *((u32 *) hci_sec_filter.event_mask + 0); uf.event_mask[1] &= *((u32 *) hci_sec_filter.event_mask + 1); } { struct hci_filter *f = &hci_pi(sk)->filter; f->type_mask = uf.type_mask; f->opcode = uf.opcode; *((u32 *) f->event_mask + 0) = uf.event_mask[0]; *((u32 *) f->event_mask + 1) = uf.event_mask[1]; } break; default: err = -ENOPROTOOPT; break; } done: release_sock(sk); return err; } static int hci_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct hci_ufilter uf; struct sock *sk = sock->sk; int len, opt, err = 0; BT_DBG("sk %p, opt %d", sk, optname); if (level != SOL_HCI) return -ENOPROTOOPT; if (get_user(len, optlen)) return -EFAULT; lock_sock(sk); if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) { err = -EBADFD; goto done; } switch (optname) { case HCI_DATA_DIR: if (hci_pi(sk)->cmsg_mask & HCI_CMSG_DIR) opt = 1; else opt = 0; if (put_user(opt, optval)) err = -EFAULT; break; case HCI_TIME_STAMP: if (hci_pi(sk)->cmsg_mask & HCI_CMSG_TSTAMP) opt = 1; else opt = 0; if (put_user(opt, optval)) err = -EFAULT; break; case HCI_FILTER: { struct hci_filter *f = &hci_pi(sk)->filter; memset(&uf, 0, sizeof(uf)); uf.type_mask = f->type_mask; uf.opcode = f->opcode; uf.event_mask[0] = *((u32 *) f->event_mask + 0); uf.event_mask[1] = *((u32 *) f->event_mask + 1); } len = min_t(unsigned int, len, sizeof(uf)); if (copy_to_user(optval, &uf, len)) err = -EFAULT; break; default: err = -ENOPROTOOPT; break; } done: release_sock(sk); return err; } static const struct proto_ops hci_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = hci_sock_release, .bind = hci_sock_bind, .getname = hci_sock_getname, .sendmsg = hci_sock_sendmsg, .recvmsg = hci_sock_recvmsg, .ioctl = hci_sock_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = hci_sock_compat_ioctl, #endif .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = hci_sock_setsockopt, .getsockopt = hci_sock_getsockopt, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .mmap = sock_no_mmap }; static struct proto hci_sk_proto = { .name = "HCI", .owner = THIS_MODULE, .obj_size = sizeof(struct hci_pinfo) }; static int hci_sock_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; BT_DBG("sock %p", sock); if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; sock->ops = &hci_sock_ops; sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, kern); if (!sk) return -ENOMEM; sock_init_data(sock, sk); sock_reset_flag(sk, SOCK_ZAPPED); sk->sk_protocol = protocol; sock->state = SS_UNCONNECTED; sk->sk_state = BT_OPEN; bt_sock_link(&hci_sk_list, sk); return 0; } static const struct net_proto_family hci_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = hci_sock_create, }; int __init hci_sock_init(void) { int err; BUILD_BUG_ON(sizeof(struct sockaddr_hci) > sizeof(struct sockaddr)); err = proto_register(&hci_sk_proto, 0); if (err < 0) return err; err = bt_sock_register(BTPROTO_HCI, &hci_sock_family_ops); if (err < 0) { BT_ERR("HCI socket registration failed"); goto error; } err = bt_procfs_init(&init_net, "hci", &hci_sk_list, NULL); if (err < 0) { BT_ERR("Failed to create HCI proc file"); bt_sock_unregister(BTPROTO_HCI); goto error; } BT_INFO("HCI socket layer initialized"); return 0; error: proto_unregister(&hci_sk_proto); return err; } void hci_sock_cleanup(void) { bt_procfs_cleanup(&init_net, "hci"); bt_sock_unregister(BTPROTO_HCI); proto_unregister(&hci_sk_proto); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef LINUX_MLD_H #define LINUX_MLD_H #include <linux/in6.h> #include <linux/icmpv6.h> /* MLDv1 Query/Report/Done */ struct mld_msg { struct icmp6hdr mld_hdr; struct in6_addr mld_mca; }; #define mld_type mld_hdr.icmp6_type #define mld_code mld_hdr.icmp6_code #define mld_cksum mld_hdr.icmp6_cksum #define mld_maxdelay mld_hdr.icmp6_maxdelay #define mld_reserved mld_hdr.icmp6_dataun.un_data16[1] /* Multicast Listener Discovery version 2 headers */ /* MLDv2 Report */ struct mld2_grec { __u8 grec_type; __u8 grec_auxwords; __be16 grec_nsrcs; struct in6_addr grec_mca; struct in6_addr grec_src[]; }; struct mld2_report { struct icmp6hdr mld2r_hdr; struct mld2_grec mld2r_grec[]; }; #define mld2r_type mld2r_hdr.icmp6_type #define mld2r_resv1 mld2r_hdr.icmp6_code #define mld2r_cksum mld2r_hdr.icmp6_cksum #define mld2r_resv2 mld2r_hdr.icmp6_dataun.un_data16[0] #define mld2r_ngrec mld2r_hdr.icmp6_dataun.un_data16[1] /* MLDv2 Query */ struct mld2_query { struct icmp6hdr mld2q_hdr; struct in6_addr mld2q_mca; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 mld2q_qrv:3, mld2q_suppress:1, mld2q_resv2:4; #elif defined(__BIG_ENDIAN_BITFIELD) __u8 mld2q_resv2:4, mld2q_suppress:1, mld2q_qrv:3; #else #error "Please fix <asm/byteorder.h>" #endif __u8 mld2q_qqic; __be16 mld2q_nsrcs; struct in6_addr mld2q_srcs[]; }; #define mld2q_type mld2q_hdr.icmp6_type #define mld2q_code mld2q_hdr.icmp6_code #define mld2q_cksum mld2q_hdr.icmp6_cksum #define mld2q_mrc mld2q_hdr.icmp6_maxdelay #define mld2q_resv1 mld2q_hdr.icmp6_dataun.un_data16[1] /* RFC3810, 5.1.3. Maximum Response Code: * * If Maximum Response Code >= 32768, Maximum Response Code represents a * floating-point value as follows: * * 0 1 2 3 4 5 6 7 8 9 A B C D E F * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |1| exp | mant | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ #define MLDV2_MRC_EXP(value) (((value) >> 12) & 0x0007) #define MLDV2_MRC_MAN(value) ((value) & 0x0fff) /* RFC3810, 5.1.9. QQIC (Querier's Query Interval Code): * * If QQIC >= 128, QQIC represents a floating-point value as follows: * * 0 1 2 3 4 5 6 7 * +-+-+-+-+-+-+-+-+ * |1| exp | mant | * +-+-+-+-+-+-+-+-+ */ #define MLDV2_QQIC_EXP(value) (((value) >> 4) & 0x07) #define MLDV2_QQIC_MAN(value) ((value) & 0x0f) #define MLD_EXP_MIN_LIMIT 32768UL #define MLDV1_MRD_MAX_COMPAT (MLD_EXP_MIN_LIMIT - 1) static inline unsigned long mldv2_mrc(const struct mld2_query *mlh2) { /* RFC3810, 5.1.3. Maximum Response Code */ unsigned long ret, mc_mrc = ntohs(mlh2->mld2q_mrc); if (mc_mrc < MLD_EXP_MIN_LIMIT) { ret = mc_mrc; } else { unsigned long mc_man, mc_exp; mc_exp = MLDV2_MRC_EXP(mc_mrc); mc_man = MLDV2_MRC_MAN(mc_mrc); ret = (mc_man | 0x1000) << (mc_exp + 3); } return ret; } #endif
1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/file.c * * Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes * * Manage the dynamic fd arrays in the process files_struct. */ #include <linux/syscalls.h> #include <linux/export.h> #include <linux/fs.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/file.h> #include <linux/fdtable.h> #include <linux/bitops.h> #include <linux/spinlock.h> #include <linux/rcupdate.h> #include <linux/close_range.h> #include <net/sock.h> unsigned int sysctl_nr_open __read_mostly = 1024*1024; unsigned int sysctl_nr_open_min = BITS_PER_LONG; /* our min() is unusable in constant expressions ;-/ */ #define __const_min(x, y) ((x) < (y) ? (x) : (y)) unsigned int sysctl_nr_open_max = __const_min(INT_MAX, ~(size_t)0/sizeof(void *)) & -BITS_PER_LONG; static void __free_fdtable(struct fdtable *fdt) { kvfree(fdt->fd); kvfree(fdt->open_fds); kfree(fdt); } static void free_fdtable_rcu(struct rcu_head *rcu) { __free_fdtable(container_of(rcu, struct fdtable, rcu)); } #define BITBIT_NR(nr) BITS_TO_LONGS(BITS_TO_LONGS(nr)) #define BITBIT_SIZE(nr) (BITBIT_NR(nr) * sizeof(long)) /* * Copy 'count' fd bits from the old table to the new table and clear the extra * space if any. This does not copy the file pointers. Called with the files * spinlock held for write. */ static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt, unsigned int count) { unsigned int cpy, set; cpy = count / BITS_PER_BYTE; set = (nfdt->max_fds - count) / BITS_PER_BYTE; memcpy(nfdt->open_fds, ofdt->open_fds, cpy); memset((char *)nfdt->open_fds + cpy, 0, set); memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy); memset((char *)nfdt->close_on_exec + cpy, 0, set); cpy = BITBIT_SIZE(count); set = BITBIT_SIZE(nfdt->max_fds) - cpy; memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy); memset((char *)nfdt->full_fds_bits + cpy, 0, set); } /* * Copy all file descriptors from the old table to the new, expanded table and * clear the extra space. Called with the files spinlock held for write. */ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) { size_t cpy, set; BUG_ON(nfdt->max_fds < ofdt->max_fds); cpy = ofdt->max_fds * sizeof(struct file *); set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); memcpy(nfdt->fd, ofdt->fd, cpy); memset((char *)nfdt->fd + cpy, 0, set); copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds); } static struct fdtable * alloc_fdtable(unsigned int nr) { struct fdtable *fdt; void *data; /* * Figure out how many fds we actually want to support in this fdtable. * Allocation steps are keyed to the size of the fdarray, since it * grows far faster than any of the other dynamic data. We try to fit * the fdarray into comfortable page-tuned chunks: starting at 1024B * and growing in powers of two from there on. */ nr /= (1024 / sizeof(struct file *)); nr = roundup_pow_of_two(nr + 1); nr *= (1024 / sizeof(struct file *)); /* * Note that this can drive nr *below* what we had passed if sysctl_nr_open * had been set lower between the check in expand_files() and here. Deal * with that in caller, it's cheaper that way. * * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise * bitmaps handling below becomes unpleasant, to put it mildly... */ if (unlikely(nr > sysctl_nr_open)) nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1; fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT); if (!fdt) goto out; fdt->max_fds = nr; data = kvmalloc_array(nr, sizeof(struct file *), GFP_KERNEL_ACCOUNT); if (!data) goto out_fdt; fdt->fd = data; data = kvmalloc(max_t(size_t, 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES), GFP_KERNEL_ACCOUNT); if (!data) goto out_arr; fdt->open_fds = data; data += nr / BITS_PER_BYTE; fdt->close_on_exec = data; data += nr / BITS_PER_BYTE; fdt->full_fds_bits = data; return fdt; out_arr: kvfree(fdt->fd); out_fdt: kfree(fdt); out: return NULL; } /* * Expand the file descriptor table. * This function will allocate a new fdtable and both fd array and fdset, of * the given size. * Return <0 error code on error; 1 on successful completion. * The files->file_lock should be held on entry, and will be held on exit. */ static int expand_fdtable(struct files_struct *files, unsigned int nr) __releases(files->file_lock) __acquires(files->file_lock) { struct fdtable *new_fdt, *cur_fdt; spin_unlock(&files->file_lock); new_fdt = alloc_fdtable(nr); /* make sure all __fd_install() have seen resize_in_progress * or have finished their rcu_read_lock_sched() section. */ if (atomic_read(&files->count) > 1) synchronize_rcu(); spin_lock(&files->file_lock); if (!new_fdt) return -ENOMEM; /* * extremely unlikely race - sysctl_nr_open decreased between the check in * caller and alloc_fdtable(). Cheaper to catch it here... */ if (unlikely(new_fdt->max_fds <= nr)) { __free_fdtable(new_fdt); return -EMFILE; } cur_fdt = files_fdtable(files); BUG_ON(nr < cur_fdt->max_fds); copy_fdtable(new_fdt, cur_fdt); rcu_assign_pointer(files->fdt, new_fdt); if (cur_fdt != &files->fdtab) call_rcu(&cur_fdt->rcu, free_fdtable_rcu); /* coupled with smp_rmb() in __fd_install() */ smp_wmb(); return 1; } /* * Expand files. * This function will expand the file structures, if the requested size exceeds * the current capacity and there is room for expansion. * Return <0 error code on error; 0 when nothing done; 1 when files were * expanded and execution may have blocked. * The files->file_lock should be held on entry, and will be held on exit. */ static int expand_files(struct files_struct *files, unsigned int nr) __releases(files->file_lock) __acquires(files->file_lock) { struct fdtable *fdt; int expanded = 0; repeat: fdt = files_fdtable(files); /* Do we need to expand? */ if (nr < fdt->max_fds) return expanded; /* Can we expand? */ if (nr >= sysctl_nr_open) return -EMFILE; if (unlikely(files->resize_in_progress)) { spin_unlock(&files->file_lock); expanded = 1; wait_event(files->resize_wait, !files->resize_in_progress); spin_lock(&files->file_lock); goto repeat; } /* All good, so we try */ files->resize_in_progress = true; expanded = expand_fdtable(files, nr); files->resize_in_progress = false; wake_up_all(&files->resize_wait); return expanded; } static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt) { __set_bit(fd, fdt->close_on_exec); } static inline void __clear_close_on_exec(unsigned int fd, struct fdtable *fdt) { if (test_bit(fd, fdt->close_on_exec)) __clear_bit(fd, fdt->close_on_exec); } static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt) { __set_bit(fd, fdt->open_fds); fd /= BITS_PER_LONG; if (!~fdt->open_fds[fd]) __set_bit(fd, fdt->full_fds_bits); } static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt) { __clear_bit(fd, fdt->open_fds); __clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits); } static unsigned int count_open_files(struct fdtable *fdt) { unsigned int size = fdt->max_fds; unsigned int i; /* Find the last open fd */ for (i = size / BITS_PER_LONG; i > 0; ) { if (fdt->open_fds[--i]) break; } i = (i + 1) * BITS_PER_LONG; return i; } static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds) { unsigned int count; count = count_open_files(fdt); if (max_fds < NR_OPEN_DEFAULT) max_fds = NR_OPEN_DEFAULT; return min(count, max_fds); } /* * Allocate a new files structure and copy contents from the * passed in files structure. * errorp will be valid only when the returned files_struct is NULL. */ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp) { struct files_struct *newf; struct file **old_fds, **new_fds; unsigned int open_files, i; struct fdtable *old_fdt, *new_fdt; *errorp = -ENOMEM; newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); if (!newf) goto out; atomic_set(&newf->count, 1); spin_lock_init(&newf->file_lock); newf->resize_in_progress = false; init_waitqueue_head(&newf->resize_wait); newf->next_fd = 0; new_fdt = &newf->fdtab; new_fdt->max_fds = NR_OPEN_DEFAULT; new_fdt->close_on_exec = newf->close_on_exec_init; new_fdt->open_fds = newf->open_fds_init; new_fdt->full_fds_bits = newf->full_fds_bits_init; new_fdt->fd = &newf->fd_array[0]; spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); open_files = sane_fdtable_size(old_fdt, max_fds); /* * Check whether we need to allocate a larger fd array and fd set. */ while (unlikely(open_files > new_fdt->max_fds)) { spin_unlock(&oldf->file_lock); if (new_fdt != &newf->fdtab) __free_fdtable(new_fdt); new_fdt = alloc_fdtable(open_files - 1); if (!new_fdt) { *errorp = -ENOMEM; goto out_release; } /* beyond sysctl_nr_open; nothing to do */ if (unlikely(new_fdt->max_fds < open_files)) { __free_fdtable(new_fdt); *errorp = -EMFILE; goto out_release; } /* * Reacquire the oldf lock and a pointer to its fd table * who knows it may have a new bigger fd table. We need * the latest pointer. */ spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); open_files = sane_fdtable_size(old_fdt, max_fds); } copy_fd_bitmaps(new_fdt, old_fdt, open_files); old_fds = old_fdt->fd; new_fds = new_fdt->fd; for (i = open_files; i != 0; i--) { struct file *f = *old_fds++; if (f) { get_file(f); } else { /* * The fd may be claimed in the fd bitmap but not yet * instantiated in the files array if a sibling thread * is partway through open(). So make sure that this * fd is available to the new process. */ __clear_open_fd(open_files - i, new_fdt); } rcu_assign_pointer(*new_fds++, f); } spin_unlock(&oldf->file_lock); /* clear the remainder */ memset(new_fds, 0, (new_fdt->max_fds - open_files) * sizeof(struct file *)); rcu_assign_pointer(newf->fdt, new_fdt); return newf; out_release: kmem_cache_free(files_cachep, newf); out: return NULL; } static struct fdtable *close_files(struct files_struct * files) { /* * It is safe to dereference the fd table without RCU or * ->file_lock because this is the last reference to the * files structure. */ struct fdtable *fdt = rcu_dereference_raw(files->fdt); unsigned int i, j = 0; for (;;) { unsigned long set; i = j * BITS_PER_LONG; if (i >= fdt->max_fds) break; set = fdt->open_fds[j++]; while (set) { if (set & 1) { struct file * file = xchg(&fdt->fd[i], NULL); if (file) { filp_close(file, files); cond_resched(); } } i++; set >>= 1; } } return fdt; } struct files_struct *get_files_struct(struct task_struct *task) { struct files_struct *files; task_lock(task); files = task->files; if (files) atomic_inc(&files->count); task_unlock(task); return files; } void put_files_struct(struct files_struct *files) { if (atomic_dec_and_test(&files->count)) { struct fdtable *fdt = close_files(files); /* free the arrays if they are not embedded */ if (fdt != &files->fdtab) __free_fdtable(fdt); kmem_cache_free(files_cachep, files); } } void reset_files_struct(struct files_struct *files) { struct task_struct *tsk = current; struct files_struct *old; old = tsk->files; task_lock(tsk); tsk->files = files; task_unlock(tsk); put_files_struct(old); } void exit_files(struct task_struct *tsk) { struct files_struct * files = tsk->files; if (files) { task_lock(tsk); tsk->files = NULL; task_unlock(tsk); put_files_struct(files); } } struct files_struct init_files = { .count = ATOMIC_INIT(1), .fdt = &init_files.fdtab, .fdtab = { .max_fds = NR_OPEN_DEFAULT, .fd = &init_files.fd_array[0], .close_on_exec = init_files.close_on_exec_init, .open_fds = init_files.open_fds_init, .full_fds_bits = init_files.full_fds_bits_init, }, .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock), .resize_wait = __WAIT_QUEUE_HEAD_INITIALIZER(init_files.resize_wait), }; static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start) { unsigned int maxfd = fdt->max_fds; unsigned int maxbit = maxfd / BITS_PER_LONG; unsigned int bitbit = start / BITS_PER_LONG; bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG; if (bitbit > maxfd) return maxfd; if (bitbit > start) start = bitbit; return find_next_zero_bit(fdt->open_fds, maxfd, start); } /* * allocate a file descriptor, mark it busy. */ int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags) { unsigned int fd; int error; struct fdtable *fdt; spin_lock(&files->file_lock); repeat: fdt = files_fdtable(files); fd = start; if (fd < files->next_fd) fd = files->next_fd; if (fd < fdt->max_fds) fd = find_next_fd(fdt, fd); /* * N.B. For clone tasks sharing a files structure, this test * will limit the total number of files that can be opened. */ error = -EMFILE; if (fd >= end) goto out; error = expand_files(files, fd); if (error < 0) goto out; /* * If we needed to expand the fs array we * might have blocked - try again. */ if (error) goto repeat; if (start <= files->next_fd) files->next_fd = fd + 1; __set_open_fd(fd, fdt); if (flags & O_CLOEXEC) __set_close_on_exec(fd, fdt); else __clear_close_on_exec(fd, fdt); error = fd; #if 1 /* Sanity check */ if (rcu_access_pointer(fdt->fd[fd]) != NULL) { printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); rcu_assign_pointer(fdt->fd[fd], NULL); } #endif out: spin_unlock(&files->file_lock); return error; } static int alloc_fd(unsigned start, unsigned flags) { return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); } int __get_unused_fd_flags(unsigned flags, unsigned long nofile) { return __alloc_fd(current->files, 0, nofile, flags); } int get_unused_fd_flags(unsigned flags) { return __get_unused_fd_flags(flags, rlimit(RLIMIT_NOFILE)); } EXPORT_SYMBOL(get_unused_fd_flags); static void __put_unused_fd(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = files_fdtable(files); __clear_open_fd(fd, fdt); if (fd < files->next_fd) files->next_fd = fd; } void put_unused_fd(unsigned int fd) { struct files_struct *files = current->files; spin_lock(&files->file_lock); __put_unused_fd(files, fd); spin_unlock(&files->file_lock); } EXPORT_SYMBOL(put_unused_fd); /* * Install a file pointer in the fd array. * * The VFS is full of places where we drop the files lock between * setting the open_fds bitmap and installing the file in the file * array. At any such point, we are vulnerable to a dup2() race * installing a file in the array before us. We need to detect this and * fput() the struct file we are about to overwrite in this case. * * It should never happen - if we allow dup2() do it, _really_ bad things * will follow. * * NOTE: __fd_install() variant is really, really low-level; don't * use it unless you are forced to by truly lousy API shoved down * your throat. 'files' *MUST* be either current->files or obtained * by get_files_struct(current) done by whoever had given it to you, * or really bad things will happen. Normally you want to use * fd_install() instead. */ void __fd_install(struct files_struct *files, unsigned int fd, struct file *file) { struct fdtable *fdt; rcu_read_lock_sched(); if (unlikely(files->resize_in_progress)) { rcu_read_unlock_sched(); spin_lock(&files->file_lock); fdt = files_fdtable(files); BUG_ON(fdt->fd[fd] != NULL); rcu_assign_pointer(fdt->fd[fd], file); spin_unlock(&files->file_lock); return; } /* coupled with smp_wmb() in expand_fdtable() */ smp_rmb(); fdt = rcu_dereference_sched(files->fdt); BUG_ON(fdt->fd[fd] != NULL); rcu_assign_pointer(fdt->fd[fd], file); rcu_read_unlock_sched(); } /* * This consumes the "file" refcount, so callers should treat it * as if they had called fput(file). */ void fd_install(unsigned int fd, struct file *file) { __fd_install(current->files, fd, file); } EXPORT_SYMBOL(fd_install); static struct file *pick_file(struct files_struct *files, unsigned fd) { struct file *file = NULL; struct fdtable *fdt; spin_lock(&files->file_lock); fdt = files_fdtable(files); if (fd >= fdt->max_fds) goto out_unlock; file = fdt->fd[fd]; if (!file) goto out_unlock; rcu_assign_pointer(fdt->fd[fd], NULL); __put_unused_fd(files, fd); out_unlock: spin_unlock(&files->file_lock); return file; } /* * The same warnings as for __alloc_fd()/__fd_install() apply here... */ int __close_fd(struct files_struct *files, unsigned fd) { struct file *file; file = pick_file(files, fd); if (!file) return -EBADF; return filp_close(file, files); } EXPORT_SYMBOL(__close_fd); /* for ksys_close() */ /** * __close_range() - Close all file descriptors in a given range. * * @fd: starting file descriptor to close * @max_fd: last file descriptor to close * * This closes a range of file descriptors. All file descriptors * from @fd up to and including @max_fd are closed. */ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags) { unsigned int cur_max; struct task_struct *me = current; struct files_struct *cur_fds = me->files, *fds = NULL; if (flags & ~CLOSE_RANGE_UNSHARE) return -EINVAL; if (fd > max_fd) return -EINVAL; rcu_read_lock(); cur_max = files_fdtable(cur_fds)->max_fds; rcu_read_unlock(); /* cap to last valid index into fdtable */ cur_max--; if (flags & CLOSE_RANGE_UNSHARE) { int ret; unsigned int max_unshare_fds = NR_OPEN_MAX; /* * If the requested range is greater than the current maximum, * we're closing everything so only copy all file descriptors * beneath the lowest file descriptor. */ if (max_fd >= cur_max) max_unshare_fds = fd; ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds); if (ret) return ret; /* * We used to share our file descriptor table, and have now * created a private one, make sure we're using it below. */ if (fds) swap(cur_fds, fds); } max_fd = min(max_fd, cur_max); while (fd <= max_fd) { struct file *file; file = pick_file(cur_fds, fd++); if (!file) continue; filp_close(file, cur_fds); cond_resched(); } if (fds) { /* * We're done closing the files we were supposed to. Time to install * the new file descriptor table and drop the old one. */ task_lock(me); me->files = cur_fds; task_unlock(me); put_files_struct(fds); } return 0; } /* * variant of __close_fd that gets a ref on the file for later fput. * The caller must ensure that filp_close() called on the file, and then * an fput(). */ int __close_fd_get_file(unsigned int fd, struct file **res) { struct files_struct *files = current->files; struct file *file; struct fdtable *fdt; spin_lock(&files->file_lock); fdt = files_fdtable(files); if (fd >= fdt->max_fds) goto out_unlock; file = fdt->fd[fd]; if (!file) goto out_unlock; rcu_assign_pointer(fdt->fd[fd], NULL); __put_unused_fd(files, fd); spin_unlock(&files->file_lock); get_file(file); *res = file; return 0; out_unlock: spin_unlock(&files->file_lock); *res = NULL; return -ENOENT; } void do_close_on_exec(struct files_struct *files) { unsigned i; struct fdtable *fdt; /* exec unshares first */ spin_lock(&files->file_lock); for (i = 0; ; i++) { unsigned long set; unsigned fd = i * BITS_PER_LONG; fdt = files_fdtable(files); if (fd >= fdt->max_fds) break; set = fdt->close_on_exec[i]; if (!set) continue; fdt->close_on_exec[i] = 0; for ( ; set ; fd++, set >>= 1) { struct file *file; if (!(set & 1)) continue; file = fdt->fd[fd]; if (!file) continue; rcu_assign_pointer(fdt->fd[fd], NULL); __put_unused_fd(files, fd); spin_unlock(&files->file_lock); filp_close(file, files); cond_resched(); spin_lock(&files->file_lock); } } spin_unlock(&files->file_lock); } static struct file *__fget_files(struct files_struct *files, unsigned int fd, fmode_t mask, unsigned int refs) { struct file *file; rcu_read_lock(); loop: file = fcheck_files(files, fd); if (file) { /* File object ref couldn't be taken. * dup2() atomicity guarantee is the reason * we loop to catch the new file (or NULL pointer) */ if (file->f_mode & mask) file = NULL; else if (!get_file_rcu_many(file, refs)) goto loop; else if (__fcheck_files(files, fd) != file) { fput_many(file, refs); goto loop; } } rcu_read_unlock(); return file; } static inline struct file *__fget(unsigned int fd, fmode_t mask, unsigned int refs) { return __fget_files(current->files, fd, mask, refs); } struct file *fget_many(unsigned int fd, unsigned int refs) { return __fget(fd, FMODE_PATH, refs); } struct file *fget(unsigned int fd) { return __fget(fd, FMODE_PATH, 1); } EXPORT_SYMBOL(fget); struct file *fget_raw(unsigned int fd) { return __fget(fd, 0, 1); } EXPORT_SYMBOL(fget_raw); struct file *fget_task(struct task_struct *task, unsigned int fd) { struct file *file = NULL; task_lock(task); if (task->files) file = __fget_files(task->files, fd, 0, 1); task_unlock(task); return file; } /* * Lightweight file lookup - no refcnt increment if fd table isn't shared. * * You can use this instead of fget if you satisfy all of the following * conditions: * 1) You must call fput_light before exiting the syscall and returning control * to userspace (i.e. you cannot remember the returned struct file * after * returning to userspace). * 2) You must not call filp_close on the returned struct file * in between * calls to fget_light and fput_light. * 3) You must not clone the current task in between the calls to fget_light * and fput_light. * * The fput_needed flag returned by fget_light should be passed to the * corresponding fput_light. */ static unsigned long __fget_light(unsigned int fd, fmode_t mask) { struct files_struct *files = current->files; struct file *file; if (atomic_read(&files->count) == 1) { file = __fcheck_files(files, fd); if (!file || unlikely(file->f_mode & mask)) return 0; return (unsigned long)file; } else { file = __fget(fd, mask, 1); if (!file) return 0; return FDPUT_FPUT | (unsigned long)file; } } unsigned long __fdget(unsigned int fd) { return __fget_light(fd, FMODE_PATH); } EXPORT_SYMBOL(__fdget); unsigned long __fdget_raw(unsigned int fd) { return __fget_light(fd, 0); } unsigned long __fdget_pos(unsigned int fd) { unsigned long v = __fdget(fd); struct file *file = (struct file *)(v & ~3); if (file && (file->f_mode & FMODE_ATOMIC_POS)) { if (file_count(file) > 1) { v |= FDPUT_POS_UNLOCK; mutex_lock(&file->f_pos_lock); } } return v; } void __f_unlock_pos(struct file *f) { mutex_unlock(&f->f_pos_lock); } /* * We only lock f_pos if we have threads or if the file might be * shared with another process. In both cases we'll have an elevated * file count (done either by fdget() or by fork()). */ void set_close_on_exec(unsigned int fd, int flag) { struct files_struct *files = current->files; struct fdtable *fdt; spin_lock(&files->file_lock); fdt = files_fdtable(files); if (flag) __set_close_on_exec(fd, fdt); else __clear_close_on_exec(fd, fdt); spin_unlock(&files->file_lock); } bool get_close_on_exec(unsigned int fd) { struct files_struct *files = current->files; struct fdtable *fdt; bool res; rcu_read_lock(); fdt = files_fdtable(files); res = close_on_exec(fd, fdt); rcu_read_unlock(); return res; } static int do_dup2(struct files_struct *files, struct file *file, unsigned fd, unsigned flags) __releases(&files->file_lock) { struct file *tofree; struct fdtable *fdt; /* * We need to detect attempts to do dup2() over allocated but still * not finished descriptor. NB: OpenBSD avoids that at the price of * extra work in their equivalent of fget() - they insert struct * file immediately after grabbing descriptor, mark it larval if * more work (e.g. actual opening) is needed and make sure that * fget() treats larval files as absent. Potentially interesting, * but while extra work in fget() is trivial, locking implications * and amount of surgery on open()-related paths in VFS are not. * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" * deadlocks in rather amusing ways, AFAICS. All of that is out of * scope of POSIX or SUS, since neither considers shared descriptor * tables and this condition does not arise without those. */ fdt = files_fdtable(files); tofree = fdt->fd[fd]; if (!tofree && fd_is_open(fd, fdt)) goto Ebusy; get_file(file); rcu_assign_pointer(fdt->fd[fd], file); __set_open_fd(fd, fdt); if (flags & O_CLOEXEC) __set_close_on_exec(fd, fdt); else __clear_close_on_exec(fd, fdt); spin_unlock(&files->file_lock); if (tofree) filp_close(tofree, files); return fd; Ebusy: spin_unlock(&files->file_lock); return -EBUSY; } int replace_fd(unsigned fd, struct file *file, unsigned flags) { int err; struct files_struct *files = current->files; if (!file) return __close_fd(files, fd); if (fd >= rlimit(RLIMIT_NOFILE)) return -EBADF; spin_lock(&files->file_lock); err = expand_files(files, fd); if (unlikely(err < 0)) goto out_unlock; return do_dup2(files, file, fd, flags); out_unlock: spin_unlock(&files->file_lock); return err; } /** * __receive_fd() - Install received file into file descriptor table * * @fd: fd to install into (if negative, a new fd will be allocated) * @file: struct file that was received from another process * @ufd: __user pointer to write new fd number to * @o_flags: the O_* flags to apply to the new fd entry * * Installs a received file into the file descriptor table, with appropriate * checks and count updates. Optionally writes the fd number to userspace, if * @ufd is non-NULL. * * This helper handles its own reference counting of the incoming * struct file. * * Returns newly install fd or -ve on error. */ int __receive_fd(int fd, struct file *file, int __user *ufd, unsigned int o_flags) { int new_fd; int error; error = security_file_receive(file); if (error) return error; if (fd < 0) { new_fd = get_unused_fd_flags(o_flags); if (new_fd < 0) return new_fd; } else { new_fd = fd; } if (ufd) { error = put_user(new_fd, ufd); if (error) { if (fd < 0) put_unused_fd(new_fd); return error; } } if (fd < 0) { fd_install(new_fd, get_file(file)); } else { error = replace_fd(new_fd, file, o_flags); if (error) return error; } /* Bump the sock usage counts, if any. */ __receive_sock(file); return new_fd; } static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags) { int err = -EBADF; struct file *file; struct files_struct *files = current->files; if ((flags & ~O_CLOEXEC) != 0) return -EINVAL; if (unlikely(oldfd == newfd)) return -EINVAL; if (newfd >= rlimit(RLIMIT_NOFILE)) return -EBADF; spin_lock(&files->file_lock); err = expand_files(files, newfd); file = fcheck(oldfd); if (unlikely(!file)) goto Ebadf; if (unlikely(err < 0)) { if (err == -EMFILE) goto Ebadf; goto out_unlock; } return do_dup2(files, file, newfd, flags); Ebadf: err = -EBADF; out_unlock: spin_unlock(&files->file_lock); return err; } SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) { return ksys_dup3(oldfd, newfd, flags); } SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) { if (unlikely(newfd == oldfd)) { /* corner case */ struct files_struct *files = current->files; int retval = oldfd; rcu_read_lock(); if (!fcheck_files(files, oldfd)) retval = -EBADF; rcu_read_unlock(); return retval; } return ksys_dup3(oldfd, newfd, 0); } SYSCALL_DEFINE1(dup, unsigned int, fildes) { int ret = -EBADF; struct file *file = fget_raw(fildes); if (file) { ret = get_unused_fd_flags(0); if (ret >= 0) fd_install(ret, file); else fput(file); } return ret; } int f_dupfd(unsigned int from, struct file *file, unsigned flags) { int err; if (from >= rlimit(RLIMIT_NOFILE)) return -EINVAL; err = alloc_fd(from, flags); if (err >= 0) { get_file(file); fd_install(err, file); } return err; } int iterate_fd(struct files_struct *files, unsigned n, int (*f)(const void *, struct file *, unsigned), const void *p) { struct fdtable *fdt; int res = 0; if (!files) return 0; spin_lock(&files->file_lock); for (fdt = files_fdtable(files); n < fdt->max_fds; n++) { struct file *file; file = rcu_dereference_check_fdtable(files, fdt->fd[n]); if (!file) continue; res = f(p, file, n); if (res) break; } spin_unlock(&files->file_lock); return res; } EXPORT_SYMBOL(iterate_fd);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_RATELIMIT_H #define _LINUX_RATELIMIT_H #include <linux/ratelimit_types.h> #include <linux/sched.h> #include <linux/spinlock.h> static inline void ratelimit_state_init(struct ratelimit_state *rs, int interval, int burst) { memset(rs, 0, sizeof(*rs)); raw_spin_lock_init(&rs->lock); rs->interval = interval; rs->burst = burst; } static inline void ratelimit_default_init(struct ratelimit_state *rs) { return ratelimit_state_init(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); } static inline void ratelimit_state_exit(struct ratelimit_state *rs) { if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE))