1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_CHECKSUM_64_H #define _ASM_X86_CHECKSUM_64_H /* * Checksums for x86-64 * Copyright 2002 by Andi Kleen, SuSE Labs * with some code from asm-x86/checksum.h */ #include <linux/compiler.h> #include <linux/uaccess.h> #include <asm/byteorder.h> /** * csum_fold - Fold and invert a 32bit checksum. * sum: 32bit unfolded sum * * Fold a 32bit running checksum to 16bit and invert it. This is usually * the last step before putting a checksum into a packet. * Make sure not to mix with 64bit checksums. */ static inline __sum16 csum_fold(__wsum sum) { asm(" addl %1,%0\n" " adcl $0xffff,%0" : "=r" (sum) : "r" ((__force u32)sum << 16), "0" ((__force u32)sum & 0xffff0000)); return (__force __sum16)(~(__force u32)sum >> 16); } /* * This is a version of ip_compute_csum() optimized for IP headers, * which always checksum on 4 octet boundaries. * * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by * Arnt Gulbrandsen. */ /** * ip_fast_csum - Compute the IPv4 header checksum efficiently. * iph: ipv4 header * ihl: length of header / 4 */ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { unsigned int sum; asm(" movl (%1), %0\n" " subl $4, %2\n" " jbe 2f\n" " addl 4(%1), %0\n" " adcl 8(%1), %0\n" " adcl 12(%1), %0\n" "1: adcl 16(%1), %0\n" " lea 4(%1), %1\n" " decl %2\n" " jne 1b\n" " adcl $0, %0\n" " movl %0, %2\n" " shrl $16, %0\n" " addw %w2, %w0\n" " adcl $0, %0\n" " notl %0\n" "2:" /* Since the input registers which are loaded with iph and ihl are modified, we must also specify them as outputs, or gcc will assume they contain their original values. */ : "=r" (sum), "=r" (iph), "=r" (ihl) : "1" (iph), "2" (ihl) : "memory"); return (__force __sum16)sum; } /** * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum. * @saddr: source address * @daddr: destination address * @len: length of packet * @proto: ip protocol of packet * @sum: initial sum to be added in (32bit unfolded) * * Returns the pseudo header checksum the input data. Result is * 32bit unfolded. */ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) { asm(" addl %1, %0\n" " adcl %2, %0\n" " adcl %3, %0\n" " adcl $0, %0\n" : "=r" (sum) : "g" (daddr), "g" (saddr), "g" ((len + proto)<<8), "0" (sum)); return sum; } /** * csum_tcpup_magic - Compute an IPv4 pseudo header checksum. * @saddr: source address * @daddr: destination address * @len: length of packet * @proto: ip protocol of packet * @sum: initial sum to be added in (32bit unfolded) * * Returns the 16bit pseudo header checksum the input data already * complemented and ready to be filled in. */ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) { return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); } /** * csum_partial - Compute an internet checksum. * @buff: buffer to be checksummed * @len: length of buffer. * @sum: initial sum to be added in (32bit unfolded) * * Returns the 32bit unfolded internet checksum of the buffer. * Before filling it in it needs to be csum_fold()'ed. * buff should be aligned to a 64bit boundary if possible. */ extern __wsum csum_partial(const void *buff, int len, __wsum sum); /* Do not call this directly. Use the wrappers below */ extern __visible __wsum csum_partial_copy_generic(const void *src, void *dst, int len); extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len); extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len); extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len); /** * ip_compute_csum - Compute an 16bit IP checksum. * @buff: buffer address. * @len: length of buffer. * * Returns the 16bit folded/inverted checksum of the passed buffer. * Ready to fill in. */ extern __sum16 ip_compute_csum(const void *buff, int len); /** * csum_ipv6_magic - Compute checksum of an IPv6 pseudo header. * @saddr: source address * @daddr: destination address * @len: length of packet * @proto: protocol of packet * @sum: initial sum (32bit unfolded) to be added in * * Computes an IPv6 pseudo header checksum. This sum is added the checksum * into UDP/TCP packets and contains some link layer information. * Returns the unfolded 32bit checksum. */ struct in6_addr; #define _HAVE_ARCH_IPV6_CSUM 1 extern __sum16 csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, __u32 len, __u8 proto, __wsum sum); static inline unsigned add32_with_carry(unsigned a, unsigned b) { asm("addl %2,%0\n\t" "adcl $0,%0" : "=r" (a) : "0" (a), "rm" (b)); return a; } #define HAVE_ARCH_CSUM_ADD static inline __wsum csum_add(__wsum csum, __wsum addend) { return (__force __wsum)add32_with_carry((__force unsigned)csum, (__force unsigned)addend); } #endif /* _ASM_X86_CHECKSUM_64_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _KBD_KERN_H #define _KBD_KERN_H #include <linux/tty.h> #include <linux/interrupt.h> #include <linux/keyboard.h> extern struct tasklet_struct keyboard_tasklet; extern char *func_table[MAX_NR_FUNC]; extern char func_buf[]; extern char *funcbufptr; extern int funcbufsize, funcbufleft; /* * kbd->xxx contains the VC-local things (flag settings etc..) * * Note: externally visible are LED_SCR, LED_NUM, LED_CAP defined in kd.h * The code in KDGETLED / KDSETLED depends on the internal and * external order being the same. * * Note: lockstate is used as index in the array key_map. */ struct kbd_struct { unsigned char lockstate; /* 8 modifiers - the names do not have any meaning at all; they can be associated to arbitrarily chosen keys */ #define VC_SHIFTLOCK KG_SHIFT /* shift lock mode */ #define VC_ALTGRLOCK KG_ALTGR /* altgr lock mode */ #define VC_CTRLLOCK KG_CTRL /* control lock mode */ #define VC_ALTLOCK KG_ALT /* alt lock mode */ #define VC_SHIFTLLOCK KG_SHIFTL /* shiftl lock mode */ #define VC_SHIFTRLOCK KG_SHIFTR /* shiftr lock mode */ #define VC_CTRLLLOCK KG_CTRLL /* ctrll lock mode */ #define VC_CTRLRLOCK KG_CTRLR /* ctrlr lock mode */ unsigned char slockstate; /* for `sticky' Shift, Ctrl, etc. */ unsigned char ledmode:1; #define LED_SHOW_FLAGS 0 /* traditional state */ #define LED_SHOW_IOCTL 1 /* only change leds upon ioctl */ unsigned char ledflagstate:4; /* flags, not lights */ unsigned char default_ledflagstate:4; #define VC_SCROLLOCK 0 /* scroll-lock mode */ #define VC_NUMLOCK 1 /* numeric lock mode */ #define VC_CAPSLOCK 2 /* capslock mode */ #define VC_KANALOCK 3 /* kanalock mode */ unsigned char kbdmode:3; /* one 3-bit value */ #define VC_XLATE 0 /* translate keycodes using keymap */ #define VC_MEDIUMRAW 1 /* medium raw (keycode) mode */ #define VC_RAW 2 /* raw (scancode) mode */ #define VC_UNICODE 3 /* Unicode mode */ #define VC_OFF 4 /* disabled mode */ unsigned char modeflags:5; #define VC_APPLIC 0 /* application key mode */ #define VC_CKMODE 1 /* cursor key mode */ #define VC_REPEAT 2 /* keyboard repeat */ #define VC_CRLF 3 /* 0 - enter sends CR, 1 - enter sends CRLF */ #define VC_META 4 /* 0 - meta, 1 - meta=prefix with ESC */ }; extern int kbd_init(void); extern void setledstate(struct kbd_struct *kbd, unsigned int led); extern int do_poke_blanked_console; extern void (*kbd_ledfunc)(unsigned int led); extern int set_console(int nr); extern void schedule_console_callback(void); /* FIXME: review locking for vt.c callers */ static inline void set_leds(void) { tasklet_schedule(&keyboard_tasklet); } static inline int vc_kbd_mode(struct kbd_struct * kbd, int flag) { return ((kbd->modeflags >> flag) & 1); } static inline int vc_kbd_led(struct kbd_struct * kbd, int flag) { return ((kbd->ledflagstate >> flag) & 1); } static inline void set_vc_kbd_mode(struct kbd_struct * kbd, int flag) { kbd->modeflags |= 1 << flag; } static inline void set_vc_kbd_led(struct kbd_struct * kbd, int flag) { kbd->ledflagstate |= 1 << flag; } static inline void clr_vc_kbd_mode(struct kbd_struct * kbd, int flag) { kbd->modeflags &= ~(1 << flag); } static inline void clr_vc_kbd_led(struct kbd_struct * kbd, int flag) { kbd->ledflagstate &= ~(1 << flag); } static inline void chg_vc_kbd_lock(struct kbd_struct * kbd, int flag) { kbd->lockstate ^= 1 << flag; } static inline void chg_vc_kbd_slock(struct kbd_struct * kbd, int flag) { kbd->slockstate ^= 1 << flag; } static inline void chg_vc_kbd_mode(struct kbd_struct * kbd, int flag) { kbd->modeflags ^= 1 << flag; } static inline void chg_vc_kbd_led(struct kbd_struct * kbd, int flag) { kbd->ledflagstate ^= 1 << flag; } #define U(x) ((x) ^ 0xf000) #define BRL_UC_ROW 0x2800 /* keyboard.c */ struct console; void compute_shiftstate(void); /* defkeymap.c */ extern unsigned int keymap_count; #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 /* SPDX-License-Identifier: GPL-2.0-only */ /* * async.h: Asynchronous function calls for boot performance * * (C) Copyright 2009 Intel Corporation * Author: Arjan van de Ven <arjan@linux.intel.com> */ #ifndef __ASYNC_H__ #define __ASYNC_H__ #include <linux/types.h> #include <linux/list.h> #include <linux/numa.h> #include <linux/device.h> typedef u64 async_cookie_t; typedef void (*async_func_t) (void *data, async_cookie_t cookie); struct async_domain { struct list_head pending; unsigned registered:1; }; /* * domain participates in global async_synchronize_full */ #define ASYNC_DOMAIN(_name) \ struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \ .registered = 1 } /* * domain is free to go out of scope as soon as all pending work is * complete, this domain does not participate in async_synchronize_full */ #define ASYNC_DOMAIN_EXCLUSIVE(_name) \ struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \ .registered = 0 } async_cookie_t async_schedule_node(async_func_t func, void *data, int node); async_cookie_t async_schedule_node_domain(async_func_t func, void *data, int node, struct async_domain *domain); /** * async_schedule - schedule a function for asynchronous execution * @func: function to execute asynchronously * @data: data pointer to pass to the function * * Returns an async_cookie_t that may be used for checkpointing later. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule(async_func_t func, void *data) { return async_schedule_node(func, data, NUMA_NO_NODE); } /** * async_schedule_domain - schedule a function for asynchronous execution within a certain domain * @func: function to execute asynchronously * @data: data pointer to pass to the function * @domain: the domain * * Returns an async_cookie_t that may be used for checkpointing later. * @domain may be used in the async_synchronize_*_domain() functions to * wait within a certain synchronization domain rather than globally. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule_domain(async_func_t func, void *data, struct async_domain *domain) { return async_schedule_node_domain(func, data, NUMA_NO_NODE, domain); } /** * async_schedule_dev - A device specific version of async_schedule * @func: function to execute asynchronously * @dev: device argument to be passed to function * * Returns an async_cookie_t that may be used for checkpointing later. * @dev is used as both the argument for the function and to provide NUMA * context for where to run the function. By doing this we can try to * provide for the best possible outcome by operating on the device on the * CPUs closest to the device. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule_dev(async_func_t func, struct device *dev) { return async_schedule_node(func, dev, dev_to_node(dev)); } /** * async_schedule_dev_domain - A device specific version of async_schedule_domain * @func: function to execute asynchronously * @dev: device argument to be passed to function * @domain: the domain * * Returns an async_cookie_t that may be used for checkpointing later. * @dev is used as both the argument for the function and to provide NUMA * context for where to run the function. By doing this we can try to * provide for the best possible outcome by operating on the device on the * CPUs closest to the device. * @domain may be used in the async_synchronize_*_domain() functions to * wait within a certain synchronization domain rather than globally. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule_dev_domain(async_func_t func, struct device *dev, struct async_domain *domain) { return async_schedule_node_domain(func, dev, dev_to_node(dev), domain); } void async_unregister_domain(struct async_domain *domain); extern void async_synchronize_full(void); extern void async_synchronize_full_domain(struct async_domain *domain); extern void async_synchronize_cookie(async_cookie_t cookie); extern void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain); extern bool current_is_async(void); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __FIRMWARE_LOADER_H #define __FIRMWARE_LOADER_H #include <linux/bitops.h> #include <linux/firmware.h> #include <linux/types.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/completion.h> #include <generated/utsrelease.h> /** * enum fw_opt - options to control firmware loading behaviour * * @FW_OPT_UEVENT: Enables the fallback mechanism to send a kobject uevent * when the firmware is not found. Userspace is in charge to load the * firmware using the sysfs loading facility. * @FW_OPT_NOWAIT: Used to describe the firmware request is asynchronous. * @FW_OPT_USERHELPER: Enable the fallback mechanism, in case the direct * filesystem lookup fails at finding the firmware. For details refer to * firmware_fallback_sysfs(). * @FW_OPT_NO_WARN: Quiet, avoid printing warning messages. * @FW_OPT_NOCACHE: Disables firmware caching. Firmware caching is used to * cache the firmware upon suspend, so that upon resume races against the * firmware file lookup on storage is avoided. Used for calls where the * file may be too big, or where the driver takes charge of its own * firmware caching mechanism. * @FW_OPT_NOFALLBACK_SYSFS: Disable the sysfs fallback mechanism. Takes * precedence over &FW_OPT_UEVENT and &FW_OPT_USERHELPER. * @FW_OPT_FALLBACK_PLATFORM: Enable fallback to device fw copy embedded in * the platform's main firmware. If both this fallback and the sysfs * fallback are enabled, then this fallback will be tried first. * @FW_OPT_PARTIAL: Allow partial read of firmware instead of needing to read * entire file. */ enum fw_opt { FW_OPT_UEVENT = BIT(0), FW_OPT_NOWAIT = BIT(1), FW_OPT_USERHELPER = BIT(2), FW_OPT_NO_WARN = BIT(3), FW_OPT_NOCACHE = BIT(4), FW_OPT_NOFALLBACK_SYSFS = BIT(5), FW_OPT_FALLBACK_PLATFORM = BIT(6), FW_OPT_PARTIAL = BIT(7), }; enum fw_status { FW_STATUS_UNKNOWN, FW_STATUS_LOADING, FW_STATUS_DONE, FW_STATUS_ABORTED, }; /* * Concurrent request_firmware() for the same firmware need to be * serialized. struct fw_state is simple state machine which hold the * state of the firmware loading. */ struct fw_state { struct completion completion; enum fw_status status; }; struct fw_priv { struct kref ref; struct list_head list; struct firmware_cache *fwc; struct fw_state fw_st; void *data; size_t size; size_t allocated_size; size_t offset; u32 opt_flags; #ifdef CONFIG_FW_LOADER_PAGED_BUF bool is_paged_buf; struct page **pages; int nr_pages; int page_array_size; #endif #ifdef CONFIG_FW_LOADER_USER_HELPER bool need_uevent; struct list_head pending_list; #endif const char *fw_name; }; extern struct mutex fw_lock; static inline bool __fw_state_check(struct fw_priv *fw_priv, enum fw_status status) { struct fw_state *fw_st = &fw_priv->fw_st; return fw_st->status == status; } static inline int __fw_state_wait_common(struct fw_priv *fw_priv, long timeout) { struct fw_state *fw_st = &fw_priv->fw_st; long ret; ret = wait_for_completion_killable_timeout(&fw_st->completion, timeout); if (ret != 0 && fw_st->status == FW_STATUS_ABORTED) return -ENOENT; if (!ret) return -ETIMEDOUT; return ret < 0 ? ret : 0; } static inline void __fw_state_set(struct fw_priv *fw_priv, enum fw_status status) { struct fw_state *fw_st = &fw_priv->fw_st; WRITE_ONCE(fw_st->status, status); if (status == FW_STATUS_DONE || status == FW_STATUS_ABORTED) { #ifdef CONFIG_FW_LOADER_USER_HELPER /* * Doing this here ensures that the fw_priv is deleted from * the pending list in all abort/done paths. */ list_del_init(&fw_priv->pending_list); #endif complete_all(&fw_st->completion); } } static inline void fw_state_aborted(struct fw_priv *fw_priv) { __fw_state_set(fw_priv, FW_STATUS_ABORTED); } static inline bool fw_state_is_aborted(struct fw_priv *fw_priv) { return __fw_state_check(fw_priv, FW_STATUS_ABORTED); } static inline void fw_state_start(struct fw_priv *fw_priv) { __fw_state_set(fw_priv, FW_STATUS_LOADING); } static inline void fw_state_done(struct fw_priv *fw_priv) { __fw_state_set(fw_priv, FW_STATUS_DONE); } int assign_fw(struct firmware *fw, struct device *device); #ifdef CONFIG_FW_LOADER_PAGED_BUF void fw_free_paged_buf(struct fw_priv *fw_priv); int fw_grow_paged_buf(struct fw_priv *fw_priv, int pages_needed); int fw_map_paged_buf(struct fw_priv *fw_priv); bool fw_is_paged_buf(struct fw_priv *fw_priv); #else static inline void fw_free_paged_buf(struct fw_priv *fw_priv) {} static inline int fw_grow_paged_buf(struct fw_priv *fw_priv, int pages_needed) { return -ENXIO; } static inline int fw_map_paged_buf(struct fw_priv *fw_priv) { return -ENXIO; } static inline bool fw_is_paged_buf(struct fw_priv *fw_priv) { return false; } #endif #endif /* __FIRMWARE_LOADER_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* user-type.h: User-defined key type * * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef _KEYS_USER_TYPE_H #define _KEYS_USER_TYPE_H #include <linux/key.h> #include <linux/rcupdate.h> #ifdef CONFIG_KEYS /*****************************************************************************/ /* * the payload for a key of type "user" or "logon" * - once filled in and attached to a key: * - the payload struct is invariant may not be changed, only replaced * - the payload must be read with RCU procedures or with the key semaphore * held * - the payload may only be replaced with the key semaphore write-locked * - the key's data length is the size of the actual data, not including the * payload wrapper */ struct user_key_payload { struct rcu_head rcu; /* RCU destructor */ unsigned short datalen; /* length of this data */ char data[] __aligned(__alignof__(u64)); /* actual data */ }; extern struct key_type key_type_user; extern struct key_type key_type_logon; struct key_preparsed_payload; extern int user_preparse(struct key_preparsed_payload *prep); extern void user_free_preparse(struct key_preparsed_payload *prep); extern int user_update(struct key *key, struct key_preparsed_payload *prep); extern void user_revoke(struct key *key); extern void user_destroy(struct key *key); extern void user_describe(const struct key *user, struct seq_file *m); extern long user_read(const struct key *key, char *buffer, size_t buflen); static inline const struct user_key_payload *user_key_payload_rcu(const struct key *key) { return (struct user_key_payload *)dereference_key_rcu(key); } static inline struct user_key_payload *user_key_payload_locked(const struct key *key) { return (struct user_key_payload *)dereference_key_locked((struct key *)key); } #endif /* CONFIG_KEYS */ #endif /* _KEYS_USER_TYPE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM block #if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_BLOCK_H #include <linux/blktrace_api.h> #include <linux/blkdev.h> #include <linux/buffer_head.h> #include <linux/tracepoint.h> #define RWBS_LEN 8 DECLARE_EVENT_CLASS(block_buffer, TP_PROTO(struct buffer_head *bh), TP_ARGS(bh), TP_STRUCT__entry ( __field( dev_t, dev ) __field( sector_t, sector ) __field( size_t, size ) ), TP_fast_assign( __entry->dev = bh->b_bdev->bd_dev; __entry->sector = bh->b_blocknr; __entry->size = bh->b_size; ), TP_printk("%d,%d sector=%llu size=%zu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->sector, __entry->size ) ); /** * block_touch_buffer - mark a buffer accessed * @bh: buffer_head being touched * * Called from touch_buffer(). */ DEFINE_EVENT(block_buffer, block_touch_buffer, TP_PROTO(struct buffer_head *bh), TP_ARGS(bh) ); /** * block_dirty_buffer - mark a buffer dirty * @bh: buffer_head being dirtied * * Called from mark_buffer_dirty(). */ DEFINE_EVENT(block_buffer, block_dirty_buffer, TP_PROTO(struct buffer_head *bh), TP_ARGS(bh) ); /** * block_rq_requeue - place block IO request back on a queue * @q: queue holding operation * @rq: block IO operation request * * The block operation request @rq is being placed back into queue * @q. For some reason the request was not completed and needs to be * put back in the queue. */ TRACE_EVENT(block_rq_requeue, TP_PROTO(struct request_queue *q, struct request *rq), TP_ARGS(q, rq), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); __get_str(cmd)[0] = '\0'; ), TP_printk("%d,%d %s (%s) %llu + %u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), (unsigned long long)__entry->sector, __entry->nr_sector, 0) ); /** * block_rq_complete - block IO operation completed by device driver * @rq: block operations request * @error: status code * @nr_bytes: number of completed bytes * * The block_rq_complete tracepoint event indicates that some portion * of operation request has been completed by the device driver. If * the @rq->bio is %NULL, then there is absolutely no additional work to * do for the request. If @rq->bio is non-NULL then there is * additional work required to complete the request. */ TRACE_EVENT(block_rq_complete, TP_PROTO(struct request *rq, int error, unsigned int nr_bytes), TP_ARGS(rq, error, nr_bytes), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( int, error ) __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; __entry->sector = blk_rq_pos(rq); __entry->nr_sector = nr_bytes >> 9; __entry->error = error; blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes); __get_str(cmd)[0] = '\0'; ), TP_printk("%d,%d %s (%s) %llu + %u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), (unsigned long long)__entry->sector, __entry->nr_sector, __entry->error) ); DECLARE_EVENT_CLASS(block_rq, TP_PROTO(struct request_queue *q, struct request *rq), TP_ARGS(q, rq), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( unsigned int, bytes ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); __entry->bytes = blk_rq_bytes(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); __get_str(cmd)[0] = '\0'; memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __entry->bytes, __get_str(cmd), (unsigned long long)__entry->sector, __entry->nr_sector, __entry->comm) ); /** * block_rq_insert - insert block operation request into queue * @q: target queue * @rq: block IO operation request * * Called immediately before block operation request @rq is inserted * into queue @q. The fields in the operation request @rq struct can * be examined to determine which device and sectors the pending * operation would access. */ DEFINE_EVENT(block_rq, block_rq_insert, TP_PROTO(struct request_queue *q, struct request *rq), TP_ARGS(q, rq) ); /** * block_rq_issue - issue pending block IO request operation to device driver * @q: queue holding operation * @rq: block IO operation operation request * * Called when block operation request @rq from queue @q is sent to a * device driver for processing. */ DEFINE_EVENT(block_rq, block_rq_issue, TP_PROTO(struct request_queue *q, struct request *rq), TP_ARGS(q, rq) ); /** * block_rq_merge - merge request with another one in the elevator * @q: queue holding operation * @rq: block IO operation operation request * * Called when block operation request @rq from queue @q is merged to another * request queued in the elevator. */ DEFINE_EVENT(block_rq, block_rq_merge, TP_PROTO(struct request_queue *q, struct request *rq), TP_ARGS(q, rq) ); /** * block_bio_bounce - used bounce buffer when processing block operation * @q: queue holding the block operation * @bio: block operation * * A bounce buffer was used to handle the block operation @bio in @q. * This occurs when hardware limitations prevent a direct transfer of * data between the @bio data memory area and the IO device. Use of a * bounce buffer requires extra copying of data and decreases * performance. */ TRACE_EVENT(block_bio_bounce, TP_PROTO(struct request_queue *q, struct bio *bio), TP_ARGS(q, bio), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->comm) ); /** * block_bio_complete - completed all work on the block operation * @q: queue holding the block operation * @bio: block operation completed * * This tracepoint indicates there is no further work to do on this * block IO operation @bio. */ TRACE_EVENT(block_bio_complete, TP_PROTO(struct request_queue *q, struct bio *bio), TP_ARGS(q, bio), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned, nr_sector ) __field( int, error ) __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); __entry->error = blk_status_to_errno(bio->bi_status); blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); ), TP_printk("%d,%d %s %llu + %u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->error) ); DECLARE_EVENT_CLASS(block_bio_merge, TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), TP_ARGS(q, rq, bio), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->comm) ); /** * block_bio_backmerge - merging block operation to the end of an existing operation * @q: queue holding operation * @rq: request bio is being merged into * @bio: new block operation to merge * * Merging block request @bio to the end of an existing block request * in queue @q. */ DEFINE_EVENT(block_bio_merge, block_bio_backmerge, TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), TP_ARGS(q, rq, bio) ); /** * block_bio_frontmerge - merging block operation to the beginning of an existing operation * @q: queue holding operation * @rq: request bio is being merged into * @bio: new block operation to merge * * Merging block IO operation @bio to the beginning of an existing block * operation in queue @q. */ DEFINE_EVENT(block_bio_merge, block_bio_frontmerge, TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), TP_ARGS(q, rq, bio) ); /** * block_bio_queue - putting new block IO operation in queue * @q: queue holding operation * @bio: new block operation * * About to place the block IO operation @bio into queue @q. */ TRACE_EVENT(block_bio_queue, TP_PROTO(struct request_queue *q, struct bio *bio), TP_ARGS(q, bio), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->comm) ); DECLARE_EVENT_CLASS(block_get_rq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), TP_ARGS(q, bio, rw), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->dev = bio ? bio_dev(bio) : 0; __entry->sector = bio ? bio->bi_iter.bi_sector : 0; __entry->nr_sector = bio ? bio_sectors(bio) : 0; blk_fill_rwbs(__entry->rwbs, bio ? bio->bi_opf : 0, __entry->nr_sector); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->comm) ); /** * block_getrq - get a free request entry in queue for block IO operations * @q: queue for operations * @bio: pending block IO operation (can be %NULL) * @rw: low bit indicates a read (%0) or a write (%1) * * A request struct for queue @q has been allocated to handle the * block IO operation @bio. */ DEFINE_EVENT(block_get_rq, block_getrq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), TP_ARGS(q, bio, rw) ); /** * block_sleeprq - waiting to get a free request entry in queue for block IO operation * @q: queue for operation * @bio: pending block IO operation (can be %NULL) * @rw: low bit indicates a read (%0) or a write (%1) * * In the case where a request struct cannot be provided for queue @q * the process needs to wait for an request struct to become * available. This tracepoint event is generated each time the * process goes to sleep waiting for request struct become available. */ DEFINE_EVENT(block_get_rq, block_sleeprq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), TP_ARGS(q, bio, rw) ); /** * block_plug - keep operations requests in request queue * @q: request queue to plug * * Plug the request queue @q. Do not allow block operation requests * to be sent to the device driver. Instead, accumulate requests in * the queue to improve throughput performance of the block device. */ TRACE_EVENT(block_plug, TP_PROTO(struct request_queue *q), TP_ARGS(q), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("[%s]", __entry->comm) ); DECLARE_EVENT_CLASS(block_unplug, TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit), TP_ARGS(q, depth, explicit), TP_STRUCT__entry( __field( int, nr_rq ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->nr_rq = depth; memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) ); /** * block_unplug - release of operations requests in request queue * @q: request queue to unplug * @depth: number of requests just added to the queue * @explicit: whether this was an explicit unplug, or one from schedule() * * Unplug request queue @q because device driver is scheduled to work * on elements in the request queue. */ DEFINE_EVENT(block_unplug, block_unplug, TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit), TP_ARGS(q, depth, explicit) ); /** * block_split - split a single bio struct into two bio structs * @q: queue containing the bio * @bio: block operation being split * @new_sector: The starting sector for the new bio * * The bio request @bio in request queue @q needs to be split into two * bio requests. The newly created @bio request starts at * @new_sector. This split may be required due to hardware limitation * such as operation crossing device boundaries in a RAID system. */ TRACE_EVENT(block_split, TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int new_sector), TP_ARGS(q, bio, new_sector), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( sector_t, new_sector ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->new_sector = new_sector; blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %llu / %llu [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, (unsigned long long)__entry->new_sector, __entry->comm) ); /** * block_bio_remap - map request for a logical device to the raw device * @q: queue holding the operation * @bio: revised operation * @dev: device for the operation * @from: original sector for the operation * * An operation for a logical device has been mapped to the * raw block device. */ TRACE_EVENT(block_bio_remap, TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, sector_t from), TP_ARGS(q, bio, dev, from), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( dev_t, old_dev ) __field( sector_t, old_sector ) __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); __entry->old_dev = dev; __entry->old_sector = from; blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); ), TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, MAJOR(__entry->old_dev), MINOR(__entry->old_dev), (unsigned long long)__entry->old_sector) ); /** * block_rq_remap - map request for a block operation request * @q: queue holding the operation * @rq: block IO operation request * @dev: device for the operation * @from: original sector for the operation * * The block operation request @rq in @q has been remapped. The block * operation request @rq holds the current information and @from hold * the original sector. */ TRACE_EVENT(block_rq_remap, TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev, sector_t from), TP_ARGS(q, rq, dev, from), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( dev_t, old_dev ) __field( sector_t, old_sector ) __field( unsigned int, nr_bios ) __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( __entry->dev = disk_devt(rq->rq_disk); __entry->sector = blk_rq_pos(rq); __entry->nr_sector = blk_rq_sectors(rq); __entry->old_dev = dev; __entry->old_sector = from; __entry->nr_bios = blk_rq_count_bios(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); ), TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, MAJOR(__entry->old_dev), MINOR(__entry->old_dev), (unsigned long long)__entry->old_sector, __entry->nr_bios) ); #endif /* _TRACE_BLOCK_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_IP6_ROUTE_H #define _NET_IP6_ROUTE_H struct route_info { __u8 type; __u8 length; __u8 prefix_len; #if defined(__BIG_ENDIAN_BITFIELD) __u8 reserved_h:3, route_pref:2, reserved_l:3; #elif defined(__LITTLE_ENDIAN_BITFIELD) __u8 reserved_l:3, route_pref:2, reserved_h:3; #endif __be32 lifetime; __u8 prefix[]; /* 0,8 or 16 */ }; #include <net/addrconf.h> #include <net/flow.h> #include <net/ip6_fib.h> #include <net/sock.h> #include <net/lwtunnel.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/route.h> #include <net/nexthop.h> #define RT6_LOOKUP_F_IFACE 0x00000001 #define RT6_LOOKUP_F_REACHABLE 0x00000002 #define RT6_LOOKUP_F_HAS_SADDR 0x00000004 #define RT6_LOOKUP_F_SRCPREF_TMP 0x00000008 #define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 #define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 #define RT6_LOOKUP_F_IGNORE_LINKSTATE 0x00000040 #define RT6_LOOKUP_F_DST_NOREF 0x00000080 /* We do not (yet ?) support IPv6 jumbograms (RFC 2675) * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header */ #define IP6_MAX_MTU (0xFFFF + sizeof(struct ipv6hdr)) /* * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate * between IPV6_ADDR_PREFERENCES socket option values * IPV6_PREFER_SRC_TMP = 0x1 * IPV6_PREFER_SRC_PUBLIC = 0x2 * IPV6_PREFER_SRC_COA = 0x4 * and above RT6_LOOKUP_F_SRCPREF_xxx flags. */ static inline int rt6_srcprefs2flags(unsigned int srcprefs) { /* No need to bitmask because srcprefs have only 3 bits. */ return srcprefs << 3; } static inline unsigned int rt6_flags2srcprefs(int flags) { return (flags >> 3) & 7; } static inline bool rt6_need_strict(const struct in6_addr *daddr) { return ipv6_addr_type(daddr) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } /* fib entries using a nexthop object can not be coalesced into * a multipath route */ static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i) { /* the RTF_ADDRCONF flag filters out RA's */ return !(f6i->fib6_flags & RTF_ADDRCONF) && !f6i->nh && f6i->fib6_nh->fib_nh_gw_family; } void ip6_route_input(struct sk_buff *skb); struct dst_entry *ip6_route_input_lookup(struct net *net, struct net_device *dev, struct flowi6 *fl6, const struct sk_buff *skb, int flags); struct dst_entry *ip6_route_output_flags_noref(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags); struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags); static inline struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, struct flowi6 *fl6) { return ip6_route_output_flags(net, sk, fl6, 0); } /* Only conditionally release dst if flags indicates * !RT6_LOOKUP_F_DST_NOREF or dst is in uncached_list. */ static inline void ip6_rt_put_flags(struct rt6_info *rt, int flags) { if (!(flags & RT6_LOOKUP_F_DST_NOREF) || !list_empty(&rt->rt6i_uncached)) ip6_rt_put(rt); } struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, int flags); struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int ifindex, struct flowi6 *fl6, const struct sk_buff *skb, int flags); void ip6_route_init_special_entries(void); int ip6_route_init(void); void ip6_route_cleanup(void); int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg); int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); int ip6_ins_rt(struct net *net, struct fib6_info *f6i); int ip6_del_rt(struct net *net, struct fib6_info *f6i, bool skip_notify); void rt6_flush_exceptions(struct fib6_info *f6i); void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args, unsigned long now); static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i, const struct in6_addr *daddr, unsigned int prefs, struct in6_addr *saddr) { int err = 0; if (f6i && f6i->fib6_prefsrc.plen) { *saddr = f6i->fib6_prefsrc.addr; } else { struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL; err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr); } return err; } struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, const struct sk_buff *skb, int flags); u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, const struct sk_buff *skb, struct flow_keys *hkeys); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); void fib6_force_start_gc(struct net *net); struct fib6_info *addrconf_f6i_alloc(struct net *net, struct inet6_dev *idev, const struct in6_addr *addr, bool anycast, gfp_t gfp_flags); struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, int flags); /* * support functions for ND * */ struct fib6_info *rt6_get_dflt_router(struct net *net, const struct in6_addr *addr, struct net_device *dev); struct fib6_info *rt6_add_dflt_router(struct net *net, const struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref); void rt6_purge_dflt_routers(struct net *net); int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr); void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, u32 mark, kuid_t uid); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu); void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, kuid_t uid); void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif); void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk); struct netlink_callback; struct rt6_rtnl_dump_arg { struct sk_buff *skb; struct netlink_callback *cb; struct net *net; struct fib_dump_filter filter; }; int rt6_dump_route(struct fib6_info *f6i, void *p_arg, unsigned int skip); void rt6_mtu_change(struct net_device *dev, unsigned int mtu); void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); void rt6_sync_up(struct net_device *dev, unsigned char nh_flags); void rt6_disable_ip(struct net_device *dev, unsigned long event); void rt6_sync_down_dev(struct net_device *dev, unsigned long event); void rt6_multipath_rebalance(struct fib6_info *f6i); void rt6_uncached_list_add(struct rt6_info *rt); void rt6_uncached_list_del(struct rt6_info *rt); static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb) { const struct dst_entry *dst = skb_dst(skb); const struct rt6_info *rt6 = NULL; if (dst) rt6 = container_of(dst, struct rt6_info, dst); return rt6; } /* * Store a destination cache entry in a socket */ static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, const struct in6_addr *daddr, const struct in6_addr *saddr) { struct ipv6_pinfo *np = inet6_sk(sk); np->dst_cookie = rt6_get_cookie((struct rt6_info *)dst); sk_setup_caps(sk, dst); np->daddr_cache = daddr; #ifdef CONFIG_IPV6_SUBTREES np->saddr_cache = saddr; #endif } void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst, const struct flowi6 *fl6); static inline bool ipv6_unicast_destination(const struct sk_buff *skb) { struct rt6_info *rt = (struct rt6_info *) skb_dst(skb); return rt->rt6i_flags & RTF_LOCAL; } static inline bool ipv6_anycast_destination(const struct dst_entry *dst, const struct in6_addr *daddr) { struct rt6_info *rt = (struct rt6_info *)dst; return rt->rt6i_flags & RTF_ANYCAST || (rt->rt6i_dst.plen < 127 && !(rt->rt6i_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) && ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)); } int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb) { unsigned int mtu; struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) { mtu = READ_ONCE(skb_dst(skb)->dev->mtu); mtu -= lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); } else mtu = dst_mtu(skb_dst(skb)); return mtu; } static inline bool ip6_sk_accept_pmtu(const struct sock *sk) { return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE && inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT; } static inline bool ip6_sk_ignore_df(const struct sock *sk) { return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO || inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT; } static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt, const struct in6_addr *daddr) { if (rt->rt6i_flags & RTF_GATEWAY) return &rt->rt6i_gateway; else if (unlikely(rt->rt6i_flags & RTF_CACHE)) return &rt->rt6i_dst.addr; else return daddr; } static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b) { struct fib6_nh *nha, *nhb; if (a->nh || b->nh) return nexthop_cmp(a->nh, b->nh); nha = a->fib6_nh; nhb = b->fib6_nh; return nha->fib_nh_dev == nhb->fib_nh_dev && ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) && !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws); } static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) { struct inet6_dev *idev; unsigned int mtu; if (dst_metric_locked(dst, RTAX_MTU)) { mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) goto out; } mtu = IPV6_MIN_MTU; rcu_read_lock(); idev = __in6_dev_get(dst->dev); if (idev) mtu = idev->cnf.mtu6; rcu_read_unlock(); out: return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } u32 ip6_mtu_from_fib6(const struct fib6_result *res, const struct in6_addr *daddr, const struct in6_addr *saddr); struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, struct net_device *dev, struct sk_buff *skb, const void *daddr); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_FLOW_DISSECTOR_H #define _NET_FLOW_DISSECTOR_H #include <linux/types.h> #include <linux/in6.h> #include <linux/siphash.h> #include <linux/string.h> #include <uapi/linux/if_ether.h> struct bpf_prog; struct net; struct sk_buff; /** * struct flow_dissector_key_control: * @thoff: Transport header offset */ struct flow_dissector_key_control { u16 thoff; u16 addr_type; u32 flags; }; #define FLOW_DIS_IS_FRAGMENT BIT(0) #define FLOW_DIS_FIRST_FRAG BIT(1) #define FLOW_DIS_ENCAPSULATION BIT(2) enum flow_dissect_ret { FLOW_DISSECT_RET_OUT_GOOD, FLOW_DISSECT_RET_OUT_BAD, FLOW_DISSECT_RET_PROTO_AGAIN, FLOW_DISSECT_RET_IPPROTO_AGAIN, FLOW_DISSECT_RET_CONTINUE, }; /** * struct flow_dissector_key_basic: * @n_proto: Network header protocol (eg. IPv4/IPv6) * @ip_proto: Transport header protocol (eg. TCP/UDP) */ struct flow_dissector_key_basic { __be16 n_proto; u8 ip_proto; u8 padding; }; struct flow_dissector_key_tags { u32 flow_label; }; struct flow_dissector_key_vlan { union { struct { u16 vlan_id:12, vlan_dei:1, vlan_priority:3; }; __be16 vlan_tci; }; __be16 vlan_tpid; }; struct flow_dissector_mpls_lse { u32 mpls_ttl:8, mpls_bos:1, mpls_tc:3, mpls_label:20; }; #define FLOW_DIS_MPLS_MAX 7 struct flow_dissector_key_mpls { struct flow_dissector_mpls_lse ls[FLOW_DIS_MPLS_MAX]; /* Label Stack */ u8 used_lses; /* One bit set for each Label Stack Entry in use */ }; static inline void dissector_set_mpls_lse(struct flow_dissector_key_mpls *mpls, int lse_index) { mpls->used_lses |= 1 << lse_index; } #define FLOW_DIS_TUN_OPTS_MAX 255 /** * struct flow_dissector_key_enc_opts: * @data: tunnel option data * @len: length of tunnel option data * @dst_opt_type: tunnel option type */ struct flow_dissector_key_enc_opts { u8 data[FLOW_DIS_TUN_OPTS_MAX]; /* Using IP_TUNNEL_OPTS_MAX is desired * here but seems difficult to #include */ u8 len; __be16 dst_opt_type; }; struct flow_dissector_key_keyid { __be32 keyid; }; /** * struct flow_dissector_key_ipv4_addrs: * @src: source ip address * @dst: destination ip address */ struct flow_dissector_key_ipv4_addrs { /* (src,dst) must be grouped, in the same way than in IP header */ __be32 src; __be32 dst; }; /** * struct flow_dissector_key_ipv6_addrs: * @src: source ip address * @dst: destination ip address */ struct flow_dissector_key_ipv6_addrs { /* (src,dst) must be grouped, in the same way than in IP header */ struct in6_addr src; struct in6_addr dst; }; /** * struct flow_dissector_key_tipc: * @key: source node address combined with selector */ struct flow_dissector_key_tipc { __be32 key; }; /** * struct flow_dissector_key_addrs: * @v4addrs: IPv4 addresses * @v6addrs: IPv6 addresses */ struct flow_dissector_key_addrs { union { struct flow_dissector_key_ipv4_addrs v4addrs; struct flow_dissector_key_ipv6_addrs v6addrs; struct flow_dissector_key_tipc tipckey; }; }; /** * flow_dissector_key_arp: * @ports: Operation, source and target addresses for an ARP header * for Ethernet hardware addresses and IPv4 protocol addresses * sip: Sender IP address * tip: Target IP address * op: Operation * sha: Sender hardware address * tpa: Target hardware address */ struct flow_dissector_key_arp { __u32 sip; __u32 tip; __u8 op; unsigned char sha[ETH_ALEN]; unsigned char tha[ETH_ALEN]; }; /** * flow_dissector_key_tp_ports: * @ports: port numbers of Transport header * src: source port number * dst: destination port number */ struct flow_dissector_key_ports { union { __be32 ports; struct { __be16 src; __be16 dst; }; }; }; /** * flow_dissector_key_icmp: * type: ICMP type * code: ICMP code * id: session identifier */ struct flow_dissector_key_icmp { struct { u8 type; u8 code; }; u16 id; }; /** * struct flow_dissector_key_eth_addrs: * @src: source Ethernet address * @dst: destination Ethernet address */ struct flow_dissector_key_eth_addrs { /* (dst,src) must be grouped, in the same way than in ETH header */ unsigned char dst[ETH_ALEN]; unsigned char src[ETH_ALEN]; }; /** * struct flow_dissector_key_tcp: * @flags: flags */ struct flow_dissector_key_tcp { __be16 flags; }; /** * struct flow_dissector_key_ip: * @tos: tos * @ttl: ttl */ struct flow_dissector_key_ip { __u8 tos; __u8 ttl; }; /** * struct flow_dissector_key_meta: * @ingress_ifindex: ingress ifindex * @ingress_iftype: ingress interface type */ struct flow_dissector_key_meta { int ingress_ifindex; u16 ingress_iftype; }; /** * struct flow_dissector_key_ct: * @ct_state: conntrack state after converting with map * @ct_mark: conttrack mark * @ct_zone: conntrack zone * @ct_labels: conntrack labels */ struct flow_dissector_key_ct { u16 ct_state; u16 ct_zone; u32 ct_mark; u32 ct_labels[4]; }; /** * struct flow_dissector_key_hash: * @hash: hash value */ struct flow_dissector_key_hash { u32 hash; }; enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_PORTS_RANGE, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC, /* struct flow_dissector_key_tipc */ FLOW_DISSECTOR_KEY_ARP, /* struct flow_dissector_key_arp */ FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_vlan */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_tags */ FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_ENC_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_ENC_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_ENC_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_MPLS, /* struct flow_dissector_key_mpls */ FLOW_DISSECTOR_KEY_TCP, /* struct flow_dissector_key_tcp */ FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */ FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_vlan */ FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */ FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */ FLOW_DISSECTOR_KEY_META, /* struct flow_dissector_key_meta */ FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */ FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */ FLOW_DISSECTOR_KEY_MAX, }; #define FLOW_DISSECTOR_F_PARSE_1ST_FRAG BIT(0) #define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL BIT(1) #define FLOW_DISSECTOR_F_STOP_AT_ENCAP BIT(2) struct flow_dissector_key { enum flow_dissector_key_id key_id; size_t offset; /* offset of struct flow_dissector_key_* in target the struct */ }; struct flow_dissector { unsigned int used_keys; /* each bit repesents presence of one key id */ unsigned short int offset[FLOW_DISSECTOR_KEY_MAX]; }; struct flow_keys_basic { struct flow_dissector_key_control control; struct flow_dissector_key_basic basic; }; struct flow_keys { struct flow_dissector_key_control control; #define FLOW_KEYS_HASH_START_FIELD basic struct flow_dissector_key_basic basic __aligned(SIPHASH_ALIGNMENT); struct flow_dissector_key_tags tags; struct flow_dissector_key_vlan vlan; struct flow_dissector_key_vlan cvlan; struct flow_dissector_key_keyid keyid; struct flow_dissector_key_ports ports; struct flow_dissector_key_icmp icmp; /* 'addrs' must be the last member */ struct flow_dissector_key_addrs addrs; }; #define FLOW_KEYS_HASH_OFFSET \ offsetof(struct flow_keys, FLOW_KEYS_HASH_START_FIELD) __be32 flow_get_u32_src(const struct flow_keys *flow); __be32 flow_get_u32_dst(const struct flow_keys *flow); extern struct flow_dissector flow_keys_dissector; extern struct flow_dissector flow_keys_basic_dissector; /* struct flow_keys_digest: * * This structure is used to hold a digest of the full flow keys. This is a * larger "hash" of a flow to allow definitively matching specific flows where * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so * that it can be used in CB of skb (see sch_choke for an example). */ #define FLOW_KEYS_DIGEST_LEN 16 struct flow_keys_digest { u8 data[FLOW_KEYS_DIGEST_LEN]; }; void make_flow_keys_digest(struct flow_keys_digest *digest, const struct flow_keys *flow); static inline bool flow_keys_have_l4(const struct flow_keys *keys) { return (keys->ports.ports || keys->tags.flow_label); } u32 flow_hash_from_keys(struct flow_keys *keys); void skb_flow_get_icmp_tci(const struct sk_buff *skb, struct flow_dissector_key_icmp *key_icmp, void *data, int thoff, int hlen); static inline bool dissector_uses_key(const struct flow_dissector *flow_dissector, enum flow_dissector_key_id key_id) { return flow_dissector->used_keys & (1 << key_id); } static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissector, enum flow_dissector_key_id key_id, void *target_container) { return ((char *)target_container) + flow_dissector->offset[key_id]; } struct bpf_flow_dissector { struct bpf_flow_keys *flow_keys; const struct sk_buff *skb; void *data; void *data_end; }; static inline void flow_dissector_init_keys(struct flow_dissector_key_control *key_control, struct flow_dissector_key_basic *key_basic) { memset(key_control, 0, sizeof(*key_control)); memset(key_basic, 0, sizeof(*key_basic)); } #ifdef CONFIG_BPF_SYSCALL int flow_dissector_bpf_prog_attach_check(struct net *net, struct bpf_prog *prog); #endif /* CONFIG_BPF_SYSCALL */ #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_VMACACHE_H #define __LINUX_VMACACHE_H #include <linux/sched.h> #include <linux/mm.h> static inline void vmacache_flush(struct task_struct *tsk) { memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas)); } extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma); extern struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr); #ifndef CONFIG_MMU extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm, unsigned long start, unsigned long end); #endif static inline void vmacache_invalidate(struct mm_struct *mm) { mm->vmacache_seqnum++; } #endif /* __LINUX_VMACACHE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM sock #if !defined(_TRACE_SOCK_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SOCK_H #include <net/sock.h> #include <net/ipv6.h> #include <linux/tracepoint.h> #include <linux/ipv6.h> #include <linux/tcp.h> #define family_names \ EM(AF_INET) \ EMe(AF_INET6) /* The protocol traced by inet_sock_set_state */ #define inet_protocol_names \ EM(IPPROTO_TCP) \ EM(IPPROTO_DCCP) \ EM(IPPROTO_SCTP) \ EMe(IPPROTO_MPTCP) #define tcp_state_names \ EM(TCP_ESTABLISHED) \ EM(TCP_SYN_SENT) \ EM(TCP_SYN_RECV) \ EM(TCP_FIN_WAIT1) \ EM(TCP_FIN_WAIT2) \ EM(TCP_TIME_WAIT) \ EM(TCP_CLOSE) \ EM(TCP_CLOSE_WAIT) \ EM(TCP_LAST_ACK) \ EM(TCP_LISTEN) \ EM(TCP_CLOSING) \ EMe(TCP_NEW_SYN_RECV) #define skmem_kind_names \ EM(SK_MEM_SEND) \ EMe(SK_MEM_RECV) /* enums need to be exported to user space */ #undef EM #undef EMe #define EM(a) TRACE_DEFINE_ENUM(a); #define EMe(a) TRACE_DEFINE_ENUM(a); family_names inet_protocol_names tcp_state_names skmem_kind_names #undef EM #undef EMe #define EM(a) { a, #a }, #define EMe(a) { a, #a } #define show_family_name(val) \ __print_symbolic(val, family_names) #define show_inet_protocol_name(val) \ __print_symbolic(val, inet_protocol_names) #define show_tcp_state_name(val) \ __print_symbolic(val, tcp_state_names) #define show_skmem_kind_names(val) \ __print_symbolic(val, skmem_kind_names) TRACE_EVENT(sock_rcvqueue_full, TP_PROTO(struct sock *sk, struct sk_buff *skb), TP_ARGS(sk, skb), TP_STRUCT__entry( __field(int, rmem_alloc) __field(unsigned int, truesize) __field(int, sk_rcvbuf) ), TP_fast_assign( __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); __entry->truesize = skb->truesize; __entry->sk_rcvbuf = READ_ONCE(sk->sk_rcvbuf); ), TP_printk("rmem_alloc=%d truesize=%u sk_rcvbuf=%d", __entry->rmem_alloc, __entry->truesize, __entry->sk_rcvbuf) ); TRACE_EVENT(sock_exceed_buf_limit, TP_PROTO(struct sock *sk, struct proto *prot, long allocated, int kind), TP_ARGS(sk, prot, allocated, kind), TP_STRUCT__entry( __array(char, name, 32) __field(long *, sysctl_mem) __field(long, allocated) __field(int, sysctl_rmem) __field(int, rmem_alloc) __field(int, sysctl_wmem) __field(int, wmem_alloc) __field(int, wmem_queued) __field(int, kind) ), TP_fast_assign( strncpy(__entry->name, prot->name, 32); __entry->sysctl_mem = prot->sysctl_mem; __entry->allocated = allocated; __entry->sysctl_rmem = sk_get_rmem0(sk, prot); __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); __entry->sysctl_wmem = sk_get_wmem0(sk, prot); __entry->wmem_alloc = refcount_read(&sk->sk_wmem_alloc); __entry->wmem_queued = READ_ONCE(sk->sk_wmem_queued); __entry->kind = kind; ), TP_printk("proto:%s sysctl_mem=%ld,%ld,%ld allocated=%ld sysctl_rmem=%d rmem_alloc=%d sysctl_wmem=%d wmem_alloc=%d wmem_queued=%d kind=%s", __entry->name, __entry->sysctl_mem[0], __entry->sysctl_mem[1], __entry->sysctl_mem[2], __entry->allocated, __entry->sysctl_rmem, __entry->rmem_alloc, __entry->sysctl_wmem, __entry->wmem_alloc, __entry->wmem_queued, show_skmem_kind_names(__entry->kind) ) ); TRACE_EVENT(inet_sock_set_state, TP_PROTO(const struct sock *sk, const int oldstate, const int newstate), TP_ARGS(sk, oldstate, newstate), TP_STRUCT__entry( __field(const void *, skaddr) __field(int, oldstate) __field(int, newstate) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(__u16, protocol) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) ), TP_fast_assign( struct inet_sock *inet = inet_sk(sk); struct in6_addr *pin6; __be32 *p32; __entry->skaddr = sk; __entry->oldstate = oldstate; __entry->newstate = newstate; __entry->family = sk->sk_family; __entry->protocol = sk->sk_protocol; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) { pin6 = (struct in6_addr *)__entry->saddr_v6; *pin6 = sk->sk_v6_rcv_saddr; pin6 = (struct in6_addr *)__entry->daddr_v6; *pin6 = sk->sk_v6_daddr; } else #endif { pin6 = (struct in6_addr *)__entry->saddr_v6; ipv6_addr_set_v4mapped(inet->inet_saddr, pin6); pin6 = (struct in6_addr *)__entry->daddr_v6; ipv6_addr_set_v4mapped(inet->inet_daddr, pin6); } ), TP_printk("family=%s protocol=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s", show_family_name(__entry->family), show_inet_protocol_name(__entry->protocol), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, show_tcp_state_name(__entry->oldstate), show_tcp_state_name(__entry->newstate)) ); #endif /* _TRACE_SOCK_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_FRAG_H__ #define __NET_FRAG_H__ #include <linux/rhashtable-types.h> #include <linux/completion.h> /* Per netns frag queues directory */ struct fqdir { /* sysctls */ long high_thresh; long low_thresh; int timeout; int max_dist; struct inet_frags *f; struct net *net; bool dead; struct rhashtable rhashtable ____cacheline_aligned_in_smp; /* Keep atomic mem on separate cachelines in structs that include it */ atomic_long_t mem ____cacheline_aligned_in_smp; struct work_struct destroy_work; }; /** * fragment queue flags * * @INET_FRAG_FIRST_IN: first fragment has arrived * @INET_FRAG_LAST_IN: final fragment has arrived * @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction * @INET_FRAG_HASH_DEAD: inet_frag_kill() has not removed fq from rhashtable */ enum { INET_FRAG_FIRST_IN = BIT(0), INET_FRAG_LAST_IN = BIT(1), INET_FRAG_COMPLETE = BIT(2), INET_FRAG_HASH_DEAD = BIT(3), }; struct frag_v4_compare_key { __be32 saddr; __be32 daddr; u32 user; u32 vif; __be16 id; u16 protocol; }; struct frag_v6_compare_key { struct in6_addr saddr; struct in6_addr daddr; u32 user; __be32 id; u32 iif; }; /** * struct inet_frag_queue - fragment queue * * @node: rhash node * @key: keys identifying this frag. * @timer: queue expiration timer * @lock: spinlock protecting this frag * @refcnt: reference count of the queue * @rb_fragments: received fragments rb-tree root * @fragments_tail: received fragments tail * @last_run_head: the head of the last "run". see ip_fragment.c * @stamp: timestamp of the last received fragment * @len: total length of the original datagram * @meat: length of received fragments so far * @flags: fragment queue flags * @max_size: maximum received fragment size * @fqdir: pointer to struct fqdir * @rcu: rcu head for freeing deferall */ struct inet_frag_queue { struct rhash_head node; union { struct frag_v4_compare_key v4; struct frag_v6_compare_key v6; } key; struct timer_list timer; spinlock_t lock; refcount_t refcnt; struct rb_root rb_fragments; struct sk_buff *fragments_tail; struct sk_buff *last_run_head; ktime_t stamp; int len; int meat; __u8 flags; u16 max_size; struct fqdir *fqdir; struct rcu_head rcu; }; struct inet_frags { unsigned int qsize; void (*constructor)(struct inet_frag_queue *q, const void *arg); void (*destructor)(struct inet_frag_queue *); void (*frag_expire)(struct timer_list *t); struct kmem_cache *frags_cachep; const char *frags_cache_name; struct rhashtable_params rhash_params; refcount_t refcnt; struct completion completion; }; int inet_frags_init(struct inet_frags *); void inet_frags_fini(struct inet_frags *); int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net); static inline void fqdir_pre_exit(struct fqdir *fqdir) { fqdir->high_thresh = 0; /* prevent creation of new frags */ fqdir->dead = true; } void fqdir_exit(struct fqdir *fqdir); void inet_frag_kill(struct inet_frag_queue *q); void inet_frag_destroy(struct inet_frag_queue *q); struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key); /* Free all skbs in the queue; return the sum of their truesizes. */ unsigned int inet_frag_rbtree_purge(struct rb_root *root); static inline void inet_frag_put(struct inet_frag_queue *q) { if (refcount_dec_and_test(&q->refcnt)) inet_frag_destroy(q); } /* Memory Tracking Functions. */ static inline long frag_mem_limit(const struct fqdir *fqdir) { return atomic_long_read(&fqdir->mem); } static inline void sub_frag_mem_limit(struct fqdir *fqdir, long val) { atomic_long_sub(val, &fqdir->mem); } static inline void add_frag_mem_limit(struct fqdir *fqdir, long val) { atomic_long_add(val, &fqdir->mem); } /* RFC 3168 support : * We want to check ECN values of all fragments, do detect invalid combinations. * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value. */ #define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */ #define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */ #define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */ #define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */ extern const u8 ip_frag_ecn_table[16]; /* Return values of inet_frag_queue_insert() */ #define IPFRAG_OK 0 #define IPFRAG_DUP 1 #define IPFRAG_OVERLAP 2 int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, int offset, int end); void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, struct sk_buff *parent); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, void *reasm_data, bool try_coalesce); struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 /* SPDX-License-Identifier: GPL-2.0 */ /* * Block data types and constants. Directly include this file only to * break include dependency loop. */ #ifndef __LINUX_BLK_TYPES_H #define __LINUX_BLK_TYPES_H #include <linux/types.h> #include <linux/bvec.h> #include <linux/ktime.h> struct bio_set; struct bio; struct bio_integrity_payload; struct page; struct io_context; struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; struct block_device { dev_t bd_dev; int bd_openers; struct inode * bd_inode; /* will die */ struct super_block * bd_super; struct mutex bd_mutex; /* open/close mutex */ void * bd_claiming; void * bd_holder; int bd_holders; bool bd_write_holder; #ifdef CONFIG_SYSFS struct list_head bd_holder_disks; #endif struct block_device * bd_contains; u8 bd_partno; struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ struct gendisk * bd_disk; struct backing_dev_info *bd_bdi; /* The counter of freeze processes */ int bd_fsfreeze_count; /* Mutex for freeze */ struct mutex bd_fsfreeze_mutex; } __randomize_layout; /* * Block error status values. See block/blk-core:blk_errors for the details. * Alpha cannot write a byte atomically, so we need to use 32-bit value. */ #if defined(CONFIG_ALPHA) && !defined(__alpha_bwx__) typedef u32 __bitwise blk_status_t; #else typedef u8 __bitwise blk_status_t; #endif #define BLK_STS_OK 0 #define BLK_STS_NOTSUPP ((__force blk_status_t)1) #define BLK_STS_TIMEOUT ((__force blk_status_t)2) #define BLK_STS_NOSPC ((__force blk_status_t)3) #define BLK_STS_TRANSPORT ((__force blk_status_t)4) #define BLK_STS_TARGET ((__force blk_status_t)5) #define BLK_STS_NEXUS ((__force blk_status_t)6) #define BLK_STS_MEDIUM ((__force blk_status_t)7) #define BLK_STS_PROTECTION ((__force blk_status_t)8) #define BLK_STS_RESOURCE ((__force blk_status_t)9) #define BLK_STS_IOERR ((__force blk_status_t)10) /* hack for device mapper, don't use elsewhere: */ #define BLK_STS_DM_REQUEUE ((__force blk_status_t)11) #define BLK_STS_AGAIN ((__force blk_status_t)12) /* * BLK_STS_DEV_RESOURCE is returned from the driver to the block layer if * device related resources are unavailable, but the driver can guarantee * that the queue will be rerun in the future once resources become * available again. This is typically the case for device specific * resources that are consumed for IO. If the driver fails allocating these * resources, we know that inflight (or pending) IO will free these * resource upon completion. * * This is different from BLK_STS_RESOURCE in that it explicitly references * a device specific resource. For resources of wider scope, allocation * failure can happen without having pending IO. This means that we can't * rely on request completions freeing these resources, as IO may not be in * flight. Examples of that are kernel memory allocations, DMA mappings, or * any other system wide resources. */ #define BLK_STS_DEV_RESOURCE ((__force blk_status_t)13) /* * BLK_STS_ZONE_RESOURCE is returned from the driver to the block layer if zone * related resources are unavailable, but the driver can guarantee the queue * will be rerun in the future once the resources become available again. * * This is different from BLK_STS_DEV_RESOURCE in that it explicitly references * a zone specific resource and IO to a different zone on the same device could * still be served. Examples of that are zones that are write-locked, but a read * to the same zone could be served. */ #define BLK_STS_ZONE_RESOURCE ((__force blk_status_t)14) /* * BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion * path if the device returns a status indicating that too many zone resources * are currently open. The same command should be successful if resubmitted * after the number of open zones decreases below the device's limits, which is * reported in the request_queue's max_open_zones. */ #define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)15) /* * BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion * path if the device returns a status indicating that too many zone resources * are currently active. The same command should be successful if resubmitted * after the number of active zones decreases below the device's limits, which * is reported in the request_queue's max_active_zones. */ #define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16) /** * blk_path_error - returns true if error may be path related * @error: status the request was completed with * * Description: * This classifies block error status into non-retryable errors and ones * that may be successful if retried on a failover path. * * Return: * %false - retrying failover path will not help * %true - may succeed if retried */ static inline bool blk_path_error(blk_status_t error) { switch (error) { case BLK_STS_NOTSUPP: case BLK_STS_NOSPC: case BLK_STS_TARGET: case BLK_STS_NEXUS: case BLK_STS_MEDIUM: case BLK_STS_PROTECTION: return false; } /* Anything else could be a path failure, so should be retried */ return true; } /* * From most significant bit: * 1 bit: reserved for other usage, see below * 12 bits: original size of bio * 51 bits: issue time of bio */ #define BIO_ISSUE_RES_BITS 1 #define BIO_ISSUE_SIZE_BITS 12 #define BIO_ISSUE_RES_SHIFT (64 - BIO_ISSUE_RES_BITS) #define BIO_ISSUE_SIZE_SHIFT (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS) #define BIO_ISSUE_TIME_MASK ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1) #define BIO_ISSUE_SIZE_MASK \ (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT) #define BIO_ISSUE_RES_MASK (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1)) /* Reserved bit for blk-throtl */ #define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63) struct bio_issue { u64 value; }; static inline u64 __bio_issue_time(u64 time) { return time & BIO_ISSUE_TIME_MASK; } static inline u64 bio_issue_time(struct bio_issue *issue) { return __bio_issue_time(issue->value); } static inline sector_t bio_issue_size(struct bio_issue *issue) { return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT); } static inline void bio_issue_init(struct bio_issue *issue, sector_t size) { size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1; issue->value = ((issue->value & BIO_ISSUE_RES_MASK) | (ktime_get_ns() & BIO_ISSUE_TIME_MASK) | ((u64)size << BIO_ISSUE_SIZE_SHIFT)); } /* * main unit of I/O for the block layer and lower layers (ie drivers and * stacking drivers) */ struct bio { struct bio *bi_next; /* request queue link */ struct gendisk *bi_disk; unsigned int bi_opf; /* bottom bits req flags, * top bits REQ_OP. Use * accessors. */ unsigned short bi_flags; /* status, etc and bvec pool number */ unsigned short bi_ioprio; unsigned short bi_write_hint; blk_status_t bi_status; u8 bi_partno; atomic_t __bi_remaining; struct bvec_iter bi_iter; bio_end_io_t *bi_end_io; void *bi_private; #ifdef CONFIG_BLK_CGROUP /* * Represents the association of the css and request_queue for the bio. * If a bio goes direct to device, it will not have a blkg as it will * not have a request_queue associated with it. The reference is put * on release of the bio. */ struct blkcg_gq *bi_blkg; struct bio_issue bi_issue; #ifdef CONFIG_BLK_CGROUP_IOCOST u64 bi_iocost_cost; #endif #endif #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct bio_crypt_ctx *bi_crypt_context; #endif union { #if defined(CONFIG_BLK_DEV_INTEGRITY) struct bio_integrity_payload *bi_integrity; /* data integrity */ #endif }; unsigned short bi_vcnt; /* how many bio_vec's */ /* * Everything starting with bi_max_vecs will be preserved by bio_reset() */ unsigned short bi_max_vecs; /* max bvl_vecs we can hold */ atomic_t __bi_cnt; /* pin count */ struct bio_vec *bi_io_vec; /* the actual vec list */ struct bio_set *bi_pool; /* * We can inline a number of vecs at the end of the bio, to avoid * double allocations for a small number of bio_vecs. This member * MUST obviously be kept at the very end of the bio. */ struct bio_vec bi_inline_vecs[]; }; #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) /* * bio flags */ enum { BIO_NO_PAGE_REF, /* don't put release vec pages */ BIO_CLONED, /* doesn't own data */ BIO_BOUNCED, /* bio is a bounce bio */ BIO_WORKINGSET, /* contains userspace workingset pages */ BIO_QUIET, /* Make BIO Quiet */ BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ BIO_REFFED, /* bio has elevated ->bi_cnt */ BIO_THROTTLED, /* This bio has already been subjected to * throttling rules. Don't do it again. */ BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion * of this bio. */ BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ BIO_TRACKED, /* set if bio goes through the rq_qos path */ BIO_FLAG_LAST }; /* See BVEC_POOL_OFFSET below before adding new flags */ /* * We support 6 different bvec pools, the last one is magic in that it * is backed by a mempool. */ #define BVEC_POOL_NR 6 #define BVEC_POOL_MAX (BVEC_POOL_NR - 1) /* * Top 3 bits of bio flags indicate the pool the bvecs came from. We add * 1 to the actual index so that 0 indicates that there are no bvecs to be * freed. */ #define BVEC_POOL_BITS (3) #define BVEC_POOL_OFFSET (16 - BVEC_POOL_BITS) #define BVEC_POOL_IDX(bio) ((bio)->bi_flags >> BVEC_POOL_OFFSET) #if (1<< BVEC_POOL_BITS) < (BVEC_POOL_NR+1) # error "BVEC_POOL_BITS is too small" #endif /* * Flags starting here get preserved by bio_reset() - this includes * only BVEC_POOL_IDX() */ #define BIO_RESET_BITS BVEC_POOL_OFFSET typedef __u32 __bitwise blk_mq_req_flags_t; /* * Operations and flags common to the bio and request structures. * We use 8 bits for encoding the operation, and the remaining 24 for flags. * * The least significant bit of the operation number indicates the data * transfer direction: * * - if the least significant bit is set transfers are TO the device * - if the least significant bit is not set transfers are FROM the device * * If a operation does not transfer data the least significant bit has no * meaning. */ #define REQ_OP_BITS 8 #define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) #define REQ_FLAG_BITS 24 enum req_opf { /* read sectors from the device */ REQ_OP_READ = 0, /* write sectors to the device */ REQ_OP_WRITE = 1, /* flush the volatile write cache */ REQ_OP_FLUSH = 2, /* discard sectors */ REQ_OP_DISCARD = 3, /* securely erase sectors */ REQ_OP_SECURE_ERASE = 5, /* write the same sector many times */ REQ_OP_WRITE_SAME = 7, /* write the zero filled sector many times */ REQ_OP_WRITE_ZEROES = 9, /* Open a zone */ REQ_OP_ZONE_OPEN = 10, /* Close a zone */ REQ_OP_ZONE_CLOSE = 11, /* Transition a zone to full */ REQ_OP_ZONE_FINISH = 12, /* write data at the current zone write pointer */ REQ_OP_ZONE_APPEND = 13, /* reset a zone write pointer */ REQ_OP_ZONE_RESET = 15, /* reset all the zone present on the device */ REQ_OP_ZONE_RESET_ALL = 17, /* SCSI passthrough using struct scsi_request */ REQ_OP_SCSI_IN = 32, REQ_OP_SCSI_OUT = 33, /* Driver private requests */ REQ_OP_DRV_IN = 34, REQ_OP_DRV_OUT = 35, REQ_OP_LAST, }; enum req_flag_bits { __REQ_FAILFAST_DEV = /* no driver retries of device errors */ REQ_OP_BITS, __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ __REQ_SYNC, /* request is sync (sync write or read) */ __REQ_META, /* metadata io request */ __REQ_PRIO, /* boost priority in cfq */ __REQ_NOMERGE, /* don't touch this for merging */ __REQ_IDLE, /* anticipate more IO after this one */ __REQ_INTEGRITY, /* I/O includes block integrity payload */ __REQ_FUA, /* forced unit access */ __REQ_PREFLUSH, /* request for cache flush */ __REQ_RAHEAD, /* read ahead, can fail anytime */ __REQ_BACKGROUND, /* background IO */ __REQ_NOWAIT, /* Don't wait if request will block */ /* * When a shared kthread needs to issue a bio for a cgroup, doing * so synchronously can lead to priority inversions as the kthread * can be trapped waiting for that cgroup. CGROUP_PUNT flag makes * submit_bio() punt the actual issuing to a dedicated per-blkcg * work item to avoid such priority inversions. */ __REQ_CGROUP_PUNT, /* command specific flags for REQ_OP_WRITE_ZEROES: */ __REQ_NOUNMAP, /* do not free blocks when zeroing */ __REQ_HIPRI, /* for driver use */ __REQ_DRV, __REQ_SWAP, /* swapping request. */ __REQ_NR_BITS, /* stops here */ }; #define REQ_FAILFAST_DEV (1ULL << __REQ_FAILFAST_DEV) #define REQ_FAILFAST_TRANSPORT (1ULL << __REQ_FAILFAST_TRANSPORT) #define REQ_FAILFAST_DRIVER (1ULL << __REQ_FAILFAST_DRIVER) #define REQ_SYNC (1ULL << __REQ_SYNC) #define REQ_META (1ULL << __REQ_META) #define REQ_PRIO (1ULL << __REQ_PRIO) #define REQ_NOMERGE (1ULL << __REQ_NOMERGE) #define REQ_IDLE (1ULL << __REQ_IDLE) #define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY) #define REQ_FUA (1ULL << __REQ_FUA) #define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) #define REQ_HIPRI (1ULL << __REQ_HIPRI) #define REQ_DRV (1ULL << __REQ_DRV) #define REQ_SWAP (1ULL << __REQ_SWAP) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_NOMERGE_FLAGS \ (REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA) enum stat_group { STAT_READ, STAT_WRITE, STAT_DISCARD, STAT_FLUSH, NR_STAT_GROUPS }; #define bio_op(bio) \ ((bio)->bi_opf & REQ_OP_MASK) #define req_op(req) \ ((req)->cmd_flags & REQ_OP_MASK) /* obsolete, don't use in new code */ static inline void bio_set_op_attrs(struct bio *bio, unsigned op, unsigned op_flags) { bio->bi_opf = op | op_flags; } static inline bool op_is_write(unsigned int op) { return (op & 1); } /* * Check if the bio or request is one that needs special treatment in the * flush state machine. */ static inline bool op_is_flush(unsigned int op) { return op & (REQ_FUA | REQ_PREFLUSH); } /* * Reads are always treated as synchronous, as are requests with the FUA or * PREFLUSH flag. Other operations may be marked as synchronous using the * REQ_SYNC flag. */ static inline bool op_is_sync(unsigned int op) { return (op & REQ_OP_MASK) == REQ_OP_READ || (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH)); } static inline bool op_is_discard(unsigned int op) { return (op & REQ_OP_MASK) == REQ_OP_DISCARD; } /* * Check if a bio or request operation is a zone management operation, with * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case * due to its different handling in the block layer and device response in * case of command failure. */ static inline bool op_is_zone_mgmt(enum req_opf op) { switch (op & REQ_OP_MASK) { case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_OPEN: case REQ_OP_ZONE_CLOSE: case REQ_OP_ZONE_FINISH: return true; default: return false; } } static inline int op_stat_group(unsigned int op) { if (op_is_discard(op)) return STAT_DISCARD; return op_is_write(op); } typedef unsigned int blk_qc_t; #define BLK_QC_T_NONE -1U #define BLK_QC_T_SHIFT 16 #define BLK_QC_T_INTERNAL (1U << 31) static inline bool blk_qc_t_valid(blk_qc_t cookie) { return cookie != BLK_QC_T_NONE; } static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) { return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT; } static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) { return cookie & ((1u << BLK_QC_T_SHIFT) - 1); } static inline bool blk_qc_t_is_internal(blk_qc_t cookie) { return (cookie & BLK_QC_T_INTERNAL) != 0; } struct blk_rq_stat { u64 mean; u64 min; u64 max; u32 nr_samples; u64 batch; }; #endif /* __LINUX_BLK_TYPES_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PARAVIRT_H #define _ASM_X86_PARAVIRT_H /* Various instructions on x86 need to be replaced for * para-virtualization: those hooks are defined here. */ #ifdef CONFIG_PARAVIRT #include <asm/pgtable_types.h> #include <asm/asm.h> #include <asm/nospec-branch.h> #include <asm/paravirt_types.h> #ifndef __ASSEMBLY__ #include <linux/bug.h> #include <linux/types.h> #include <linux/cpumask.h> #include <asm/frame.h> static inline unsigned long long paravirt_sched_clock(void) { return PVOP_CALL0(unsigned long long, time.sched_clock); } struct static_key; extern struct static_key paravirt_steal_enabled; extern struct static_key paravirt_steal_rq_enabled; __visible void __native_queued_spin_unlock(struct qspinlock *lock); bool pv_is_native_spin_unlock(void); __visible bool __native_vcpu_is_preempted(long cpu); bool pv_is_native_vcpu_is_preempted(void); static inline u64 paravirt_steal_clock(int cpu) { return PVOP_CALL1(u64, time.steal_clock, cpu); } /* The paravirtualized I/O functions */ static inline void slow_down_io(void) { pv_ops.cpu.io_delay(); #ifdef REALLY_SLOW_IO pv_ops.cpu.io_delay(); pv_ops.cpu.io_delay(); pv_ops.cpu.io_delay(); #endif } void native_flush_tlb_local(void); void native_flush_tlb_global(void); void native_flush_tlb_one_user(unsigned long addr); void native_flush_tlb_others(const struct cpumask *cpumask, const struct flush_tlb_info *info); static inline void __flush_tlb_local(void) { PVOP_VCALL0(mmu.flush_tlb_user); } static inline void __flush_tlb_global(void) { PVOP_VCALL0(mmu.flush_tlb_kernel); } static inline void __flush_tlb_one_user(unsigned long addr) { PVOP_VCALL1(mmu.flush_tlb_one_user, addr); } static inline void __flush_tlb_others(const struct cpumask *cpumask, const struct flush_tlb_info *info) { PVOP_VCALL2(mmu.flush_tlb_others, cpumask, info); } static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) { PVOP_VCALL2(mmu.tlb_remove_table, tlb, table); } static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) { PVOP_VCALL1(mmu.exit_mmap, mm); } #ifdef CONFIG_PARAVIRT_XXL static inline void load_sp0(unsigned long sp0) { PVOP_VCALL1(cpu.load_sp0, sp0); } /* The paravirtualized CPUID instruction. */ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { PVOP_VCALL4(cpu.cpuid, eax, ebx, ecx, edx); } /* * These special macros can be used to get or set a debugging register */ static inline unsigned long paravirt_get_debugreg(int reg) { return PVOP_CALL1(unsigned long, cpu.get_debugreg, reg); } #define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) static inline void set_debugreg(unsigned long val, int reg) { PVOP_VCALL2(cpu.set_debugreg, reg, val); } static inline unsigned long read_cr0(void) { return PVOP_CALL0(unsigned long, cpu.read_cr0); } static inline void write_cr0(unsigned long x) { PVOP_VCALL1(cpu.write_cr0, x); } static inline unsigned long read_cr2(void) { return PVOP_CALLEE0(unsigned long, mmu.read_cr2); } static inline void write_cr2(unsigned long x) { PVOP_VCALL1(mmu.write_cr2, x); } static inline unsigned long __read_cr3(void) { return PVOP_CALL0(unsigned long, mmu.read_cr3); } static inline void write_cr3(unsigned long x) { PVOP_VCALL1(mmu.write_cr3, x); } static inline void __write_cr4(unsigned long x) { PVOP_VCALL1(cpu.write_cr4, x); } static inline void arch_safe_halt(void) { PVOP_VCALL0(irq.safe_halt); } static inline void halt(void) { PVOP_VCALL0(irq.halt); } static inline void wbinvd(void) { PVOP_VCALL0(cpu.wbinvd); } static inline u64 paravirt_read_msr(unsigned msr) { return PVOP_CALL1(u64, cpu.read_msr, msr); } static inline void paravirt_write_msr(unsigned msr, unsigned low, unsigned high) { PVOP_VCALL3(cpu.write_msr, msr, low, high); } static inline u64 paravirt_read_msr_safe(unsigned msr, int *err) { return PVOP_CALL2(u64, cpu.read_msr_safe, msr, err); } static inline int paravirt_write_msr_safe(unsigned msr, unsigned low, unsigned high) { return PVOP_CALL3(int, cpu.write_msr_safe, msr, low, high); } #define rdmsr(msr, val1, val2) \ do { \ u64 _l = paravirt_read_msr(msr); \ val1 = (u32)_l; \ val2 = _l >> 32; \ } while (0) #define wrmsr(msr, val1, val2) \ do { \ paravirt_write_msr(msr, val1, val2); \ } while (0) #define rdmsrl(msr, val) \ do { \ val = paravirt_read_msr(msr); \ } while (0) static inline void wrmsrl(unsigned msr, u64 val) { wrmsr(msr, (u32)val, (u32)(val>>32)); } #define wrmsr_safe(msr, a, b) paravirt_write_msr_safe(msr, a, b) /* rdmsr with exception handling */ #define rdmsr_safe(msr, a, b) \ ({ \ int _err; \ u64 _l = paravirt_read_msr_safe(msr, &_err); \ (*a) = (u32)_l; \ (*b) = _l >> 32; \ _err; \ }) static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) { int err; *p = paravirt_read_msr_safe(msr, &err); return err; } static inline unsigned long long paravirt_read_pmc(int counter) { return PVOP_CALL1(u64, cpu.read_pmc, counter); } #define rdpmc(counter, low, high) \ do { \ u64 _l = paravirt_read_pmc(counter); \ low = (u32)_l; \ high = _l >> 32; \ } while (0) #define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter)) static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) { PVOP_VCALL2(cpu.alloc_ldt, ldt, entries); } static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) { PVOP_VCALL2(cpu.free_ldt, ldt, entries); } static inline void load_TR_desc(void) { PVOP_VCALL0(cpu.load_tr_desc); } static inline void load_gdt(const struct desc_ptr *dtr) { PVOP_VCALL1(cpu.load_gdt, dtr); } static inline void load_idt(const struct desc_ptr *dtr) { PVOP_VCALL1(cpu.load_idt, dtr); } static inline void set_ldt(const void *addr, unsigned entries) { PVOP_VCALL2(cpu.set_ldt, addr, entries); } static inline unsigned long paravirt_store_tr(void) { return PVOP_CALL0(unsigned long, cpu.store_tr); } #define store_tr(tr) ((tr) = paravirt_store_tr()) static inline void load_TLS(struct thread_struct *t, unsigned cpu) { PVOP_VCALL2(cpu.load_tls, t, cpu); } static inline void load_gs_index(unsigned int gs) { PVOP_VCALL1(cpu.load_gs_index, gs); } static inline void write_ldt_entry(struct desc_struct *dt, int entry, const void *desc) { PVOP_VCALL3(cpu.write_ldt_entry, dt, entry, desc); } static inline void write_gdt_entry(struct desc_struct *dt, int entry, void *desc, int type) { PVOP_VCALL4(cpu.write_gdt_entry, dt, entry, desc, type); } static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) { PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g); } #ifdef CONFIG_X86_IOPL_IOPERM static inline void tss_invalidate_io_bitmap(void) { PVOP_VCALL0(cpu.invalidate_io_bitmap); } static inline void tss_update_io_bitmap(void) { PVOP_VCALL0(cpu.update_io_bitmap); } #endif static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) { PVOP_VCALL2(mmu.activate_mm, prev, next); } static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { PVOP_VCALL2(mmu.dup_mmap, oldmm, mm); } static inline int paravirt_pgd_alloc(struct mm_struct *mm) { return PVOP_CALL1(int, mmu.pgd_alloc, mm); } static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) { PVOP_VCALL2(mmu.pgd_free, mm, pgd); } static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) { PVOP_VCALL2(mmu.alloc_pte, mm, pfn); } static inline void paravirt_release_pte(unsigned long pfn) { PVOP_VCALL1(mmu.release_pte, pfn); } static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) { PVOP_VCALL2(mmu.alloc_pmd, mm, pfn); } static inline void paravirt_release_pmd(unsigned long pfn) { PVOP_VCALL1(mmu.release_pmd, pfn); } static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) { PVOP_VCALL2(mmu.alloc_pud, mm, pfn); } static inline void paravirt_release_pud(unsigned long pfn) { PVOP_VCALL1(mmu.release_pud, pfn); } static inline void paravirt_alloc_p4d(struct mm_struct *mm, unsigned long pfn) { PVOP_VCALL2(mmu.alloc_p4d, mm, pfn); } static inline void paravirt_release_p4d(unsigned long pfn) { PVOP_VCALL1(mmu.release_p4d, pfn); } static inline pte_t __pte(pteval_t val) { return (pte_t) { PVOP_CALLEE1(pteval_t, mmu.make_pte, val) }; } static inline pteval_t pte_val(pte_t pte) { return PVOP_CALLEE1(pteval_t, mmu.pte_val, pte.pte); } static inline pgd_t __pgd(pgdval_t val) { return (pgd_t) { PVOP_CALLEE1(pgdval_t, mmu.make_pgd, val) }; } static inline pgdval_t pgd_val(pgd_t pgd) { return PVOP_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd); } #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pteval_t ret; ret = PVOP_CALL3(pteval_t, mmu.ptep_modify_prot_start, vma, addr, ptep); return (pte_t) { .pte = ret }; } static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte) { PVOP_VCALL4(mmu.ptep_modify_prot_commit, vma, addr, ptep, pte.pte); } static inline void set_pte(pte_t *ptep, pte_t pte) { PVOP_VCALL2(mmu.set_pte, ptep, pte.pte); } static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { PVOP_VCALL2(mmu.set_pmd, pmdp, native_pmd_val(pmd)); } static inline pmd_t __pmd(pmdval_t val) { return (pmd_t) { PVOP_CALLEE1(pmdval_t, mmu.make_pmd, val) }; } static inline pmdval_t pmd_val(pmd_t pmd) { return PVOP_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd); } static inline void set_pud(pud_t *pudp, pud_t pud) { PVOP_VCALL2(mmu.set_pud, pudp, native_pud_val(pud)); } static inline pud_t __pud(pudval_t val) { pudval_t ret; ret = PVOP_CALLEE1(pudval_t, mmu.make_pud, val); return (pud_t) { ret }; } static inline pudval_t pud_val(pud_t pud) { return PVOP_CALLEE1(pudval_t, mmu.pud_val, pud.pud); } static inline void pud_clear(pud_t *pudp) { set_pud(pudp, native_make_pud(0)); } static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { p4dval_t val = native_p4d_val(p4d); PVOP_VCALL2(mmu.set_p4d, p4dp, val); } #if CONFIG_PGTABLE_LEVELS >= 5 static inline p4d_t __p4d(p4dval_t val) { p4dval_t ret = PVOP_CALLEE1(p4dval_t, mmu.make_p4d, val); return (p4d_t) { ret }; } static inline p4dval_t p4d_val(p4d_t p4d) { return PVOP_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d); } static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd) { PVOP_VCALL2(mmu.set_pgd, pgdp, native_pgd_val(pgd)); } #define set_pgd(pgdp, pgdval) do { \ if (pgtable_l5_enabled()) \ __set_pgd(pgdp, pgdval); \ else \ set_p4d((p4d_t *)(pgdp), (p4d_t) { (pgdval).pgd }); \ } while (0) #define pgd_clear(pgdp) do { \ if (pgtable_l5_enabled()) \ set_pgd(pgdp, native_make_pgd(0)); \ } while (0) #endif /* CONFIG_PGTABLE_LEVELS == 5 */ static inline void p4d_clear(p4d_t *p4dp) { set_p4d(p4dp, native_make_p4d(0)); } static inline void set_pte_atomic(pte_t *ptep, pte_t pte) { set_pte(ptep, pte); } static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { set_pte(ptep, native_make_pte(0)); } static inline void pmd_clear(pmd_t *pmdp) { set_pmd(pmdp, native_make_pmd(0)); } #define __HAVE_ARCH_START_CONTEXT_SWITCH static inline void arch_start_context_switch(struct task_struct *prev) { PVOP_VCALL1(cpu.start_context_switch, prev); } static inline void arch_end_context_switch(struct task_struct *next) { PVOP_VCALL1(cpu.end_context_switch, next); } #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE static inline void arch_enter_lazy_mmu_mode(void) { PVOP_VCALL0(mmu.lazy_mode.enter); } static inline void arch_leave_lazy_mmu_mode(void) { PVOP_VCALL0(mmu.lazy_mode.leave); } static inline void arch_flush_lazy_mmu_mode(void) { PVOP_VCALL0(mmu.lazy_mode.flush); } static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags) { pv_ops.mmu.set_fixmap(idx, phys, flags); } #endif #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { PVOP_VCALL2(lock.queued_spin_lock_slowpath, lock, val); } static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock) { PVOP_VCALLEE1(lock.queued_spin_unlock, lock); } static __always_inline void pv_wait(u8 *ptr, u8 val) { PVOP_VCALL2(lock.wait, ptr, val); } static __always_inline void pv_kick(int cpu) { PVOP_VCALL1(lock.kick, cpu); } static __always_inline bool pv_vcpu_is_preempted(long cpu) { return PVOP_CALLEE1(bool, lock.vcpu_is_preempted, cpu); } void __raw_callee_save___native_queued_spin_unlock(struct qspinlock *lock); bool __raw_callee_save___native_vcpu_is_preempted(long cpu); #endif /* SMP && PARAVIRT_SPINLOCKS */ #ifdef CONFIG_X86_32 /* save and restore all caller-save registers, except return value */ #define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;" #define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;" #else /* save and restore all caller-save registers, except return value */ #define PV_SAVE_ALL_CALLER_REGS \ "push %rcx;" \ "push %rdx;" \ "push %rsi;" \ "push %rdi;" \ "push %r8;" \ "push %r9;" \ "push %r10;" \ "push %r11;" #define PV_RESTORE_ALL_CALLER_REGS \ "pop %r11;" \ "pop %r10;" \ "pop %r9;" \ "pop %r8;" \ "pop %rdi;" \ "pop %rsi;" \ "pop %rdx;" \ "pop %rcx;" #endif /* * Generate a thunk around a function which saves all caller-save * registers except for the return value. This allows C functions to * be called from assembler code where fewer than normal registers are * available. It may also help code generation around calls from C * code if the common case doesn't use many registers. * * When a callee is wrapped in a thunk, the caller can assume that all * arg regs and all scratch registers are preserved across the * call. The return value in rax/eax will not be saved, even for void * functions. */ #define PV_THUNK_NAME(func) "__raw_callee_save_" #func #define PV_CALLEE_SAVE_REGS_THUNK(func) \ extern typeof(func) __raw_callee_save_##func; \ \ asm(".pushsection .text;" \ ".globl " PV_THUNK_NAME(func) ";" \ ".type " PV_THUNK_NAME(func) ", @function;" \ PV_THUNK_NAME(func) ":" \ FRAME_BEGIN \ PV_SAVE_ALL_CALLER_REGS \ "call " #func ";" \ PV_RESTORE_ALL_CALLER_REGS \ FRAME_END \ "ret;" \ ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \ ".popsection") /* Get a reference to a callee-save function */ #define PV_CALLEE_SAVE(func) \ ((struct paravirt_callee_save) { __raw_callee_save_##func }) /* Promise that "func" already uses the right calling convention */ #define __PV_IS_CALLEE_SAVE(func) \ ((struct paravirt_callee_save) { func }) #ifdef CONFIG_PARAVIRT_XXL static inline notrace unsigned long arch_local_save_flags(void) { return PVOP_CALLEE0(unsigned long, irq.save_fl); } static inline notrace void arch_local_irq_restore(unsigned long f) { PVOP_VCALLEE1(irq.restore_fl, f); } static inline notrace void arch_local_irq_disable(void) { PVOP_VCALLEE0(irq.irq_disable); } static inline notrace void arch_local_irq_enable(void) { PVOP_VCALLEE0(irq.irq_enable); } static inline notrace unsigned long arch_local_irq_save(void) { unsigned long f; f = arch_local_save_flags(); arch_local_irq_disable(); return f; } #endif /* Make sure as little as possible of this mess escapes. */ #undef PARAVIRT_CALL #undef __PVOP_CALL #undef __PVOP_VCALL #undef PVOP_VCALL0 #undef PVOP_CALL0 #undef PVOP_VCALL1 #undef PVOP_CALL1 #undef PVOP_VCALL2 #undef PVOP_CALL2 #undef PVOP_VCALL3 #undef PVOP_CALL3 #undef PVOP_VCALL4 #undef PVOP_CALL4 extern void default_banner(void); #else /* __ASSEMBLY__ */ #define _PVSITE(ptype, ops, word, algn) \ 771:; \ ops; \ 772:; \ .pushsection .parainstructions,"a"; \ .align algn; \ word 771b; \ .byte ptype; \ .byte 772b-771b; \ .popsection #define COND_PUSH(set, mask, reg) \ .if ((~(set)) & mask); push %reg; .endif #define COND_POP(set, mask, reg) \ .if ((~(set)) & mask); pop %reg; .endif #ifdef CONFIG_X86_64 #define PV_SAVE_REGS(set) \ COND_PUSH(set, CLBR_RAX, rax); \ COND_PUSH(set, CLBR_RCX, rcx); \ COND_PUSH(set, CLBR_RDX, rdx); \ COND_PUSH(set, CLBR_RSI, rsi); \ COND_PUSH(set, CLBR_RDI, rdi); \ COND_PUSH(set, CLBR_R8, r8); \ COND_PUSH(set, CLBR_R9, r9); \ COND_PUSH(set, CLBR_R10, r10); \ COND_PUSH(set, CLBR_R11, r11) #define PV_RESTORE_REGS(set) \ COND_POP(set, CLBR_R11, r11); \ COND_POP(set, CLBR_R10, r10); \ COND_POP(set, CLBR_R9, r9); \ COND_POP(set, CLBR_R8, r8); \ COND_POP(set, CLBR_RDI, rdi); \ COND_POP(set, CLBR_RSI, rsi); \ COND_POP(set, CLBR_RDX, rdx); \ COND_POP(set, CLBR_RCX, rcx); \ COND_POP(set, CLBR_RAX, rax) #define PARA_PATCH(off) ((off) / 8) #define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .quad, 8) #define PARA_INDIRECT(addr) *addr(%rip) #else #define PV_SAVE_REGS(set) \ COND_PUSH(set, CLBR_EAX, eax); \ COND_PUSH(set, CLBR_EDI, edi); \ COND_PUSH(set, CLBR_ECX, ecx); \ COND_PUSH(set, CLBR_EDX, edx) #define PV_RESTORE_REGS(set) \ COND_POP(set, CLBR_EDX, edx); \ COND_POP(set, CLBR_ECX, ecx); \ COND_POP(set, CLBR_EDI, edi); \ COND_POP(set, CLBR_EAX, eax) #define PARA_PATCH(off) ((off) / 4) #define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .long, 4) #define PARA_INDIRECT(addr) *%cs:addr #endif #ifdef CONFIG_PARAVIRT_XXL #define INTERRUPT_RETURN \ PARA_SITE(PARA_PATCH(PV_CPU_iret), \ ANNOTATE_RETPOLINE_SAFE; \ jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);) #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(PV_IRQ_irq_disable), \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_ops+PV_IRQ_irq_disable); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #define ENABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(PV_IRQ_irq_enable), \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_ops+PV_IRQ_irq_enable); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #endif #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT_XXL /* * If swapgs is used while the userspace stack is still current, * there's no way to call a pvop. The PV replacement *must* be * inlined, or the swapgs instruction must be trapped and emulated. */ #define SWAPGS_UNSAFE_STACK \ PARA_SITE(PARA_PATCH(PV_CPU_swapgs), swapgs) /* * Note: swapgs is very special, and in practise is either going to be * implemented with a single "swapgs" instruction or something very * special. Either way, we don't need to save any registers for * it. */ #define SWAPGS \ PARA_SITE(PARA_PATCH(PV_CPU_swapgs), \ ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_ops+PV_CPU_swapgs); \ ) #define USERGS_SYSRET64 \ PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64), \ ANNOTATE_RETPOLINE_SAFE; \ jmp PARA_INDIRECT(pv_ops+PV_CPU_usergs_sysret64);) #ifdef CONFIG_DEBUG_ENTRY #define SAVE_FLAGS(clobbers) \ PARA_SITE(PARA_PATCH(PV_IRQ_save_fl), \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #endif #endif /* CONFIG_PARAVIRT_XXL */ #endif /* CONFIG_X86_64 */ #ifdef CONFIG_PARAVIRT_XXL #define GET_CR2_INTO_AX \ PARA_SITE(PARA_PATCH(PV_MMU_read_cr2), \ ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2); \ ) #endif /* CONFIG_PARAVIRT_XXL */ #endif /* __ASSEMBLY__ */ #else /* CONFIG_PARAVIRT */ # define default_banner x86_init_noop #endif /* !CONFIG_PARAVIRT */ #ifndef __ASSEMBLY__ #ifndef CONFIG_PARAVIRT_XXL static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { } #endif #ifndef CONFIG_PARAVIRT static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) { } #endif #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PARAVIRT_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM exceptions #if !defined(_TRACE_PAGE_FAULT_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_PAGE_FAULT_H #include <linux/tracepoint.h> #include <asm/trace/common.h> extern int trace_pagefault_reg(void); extern void trace_pagefault_unreg(void); DECLARE_EVENT_CLASS(x86_exceptions, TP_PROTO(unsigned long address, struct pt_regs *regs, unsigned long error_code), TP_ARGS(address, regs, error_code), TP_STRUCT__entry( __field( unsigned long, address ) __field( unsigned long, ip ) __field( unsigned long, error_code ) ), TP_fast_assign( __entry->address = address; __entry->ip = regs->ip; __entry->error_code = error_code; ), TP_printk("address=%ps ip=%ps error_code=0x%lx", (void *)__entry->address, (void *)__entry->ip, __entry->error_code) ); #define DEFINE_PAGE_FAULT_EVENT(name) \ DEFINE_EVENT_FN(x86_exceptions, name, \ TP_PROTO(unsigned long address, struct pt_regs *regs, \ unsigned long error_code), \ TP_ARGS(address, regs, error_code), \ trace_pagefault_reg, trace_pagefault_unreg); DEFINE_PAGE_FAULT_EVENT(page_fault_user); DEFINE_PAGE_FAULT_EVENT(page_fault_kernel); #undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE exceptions #endif /* _TRACE_PAGE_FAULT_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef BLK_MQ_SCHED_H #define BLK_MQ_SCHED_H #include "blk-mq.h" #include "blk-mq-tag.h" void blk_mq_sched_assign_ioc(struct request *rq); void blk_mq_sched_request_inserted(struct request *rq); bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs, struct request **merged_request); bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs); bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq); void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx); void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx); void blk_mq_sched_insert_request(struct request *rq, bool at_head, bool run_queue, bool async); void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list, bool run_queue_async); void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx); int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e); void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e); void blk_mq_sched_free_requests(struct request_queue *q); static inline bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { if (blk_queue_nomerges(q) || !bio_mergeable(bio)) return false; return __blk_mq_sched_bio_merge(q, bio, nr_segs); } static inline bool blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq, struct bio *bio) { struct elevator_queue *e = q->elevator; if (e && e->type->ops.allow_merge) return e->type->ops.allow_merge(q, rq, bio); return true; } static inline void blk_mq_sched_completed_request(struct request *rq, u64 now) { struct elevator_queue *e = rq->q->elevator; if (e && e->type->ops.completed_request) e->type->ops.completed_request(rq, now); } static inline void blk_mq_sched_requeue_request(struct request *rq) { struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator; if ((rq->rq_flags & RQF_ELVPRIV) && e && e->type->ops.requeue_request) e->type->ops.requeue_request(rq); } static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx) { struct elevator_queue *e = hctx->queue->elevator; if (e && e->type->ops.has_work) return e->type->ops.has_work(hctx); return false; } static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx) { return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); } #endif
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) 2001 Momchil Velikov * Portions Copyright (C) 2001 Christoph Hellwig * Copyright (C) 2006 Nick Piggin * Copyright (C) 2012 Konstantin Khlebnikov */ #ifndef _LINUX_RADIX_TREE_H #define _LINUX_RADIX_TREE_H #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/percpu.h> #include <linux/preempt.h> #include <linux/rcupdate.h> #include <linux/spinlock.h> #include <linux/types.h> #include <linux/xarray.h> #include <linux/local_lock.h> /* Keep unconverted code working */ #define radix_tree_root xarray #define radix_tree_node xa_node struct radix_tree_preload { local_lock_t lock; unsigned nr; /* nodes->parent points to next preallocated node */ struct radix_tree_node *nodes; }; DECLARE_PER_CPU(struct radix_tree_preload, radix_tree_preloads); /* * The bottom two bits of the slot determine how the remaining bits in the * slot are interpreted: * * 00 - data pointer * 10 - internal entry * x1 - value entry * * The internal entry may be a pointer to the next level in the tree, a * sibling entry, or an indicator that the entry in this slot has been moved * to another location in the tree and the lookup should be restarted. While * NULL fits the 'data pointer' pattern, it means that there is no entry in * the tree for this index (no matter what level of the tree it is found at). * This means that storing a NULL entry in the tree is the same as deleting * the entry from the tree. */ #define RADIX_TREE_ENTRY_MASK 3UL #define RADIX_TREE_INTERNAL_NODE 2UL static inline bool radix_tree_is_internal_node(void *ptr) { return ((unsigned long)ptr & RADIX_TREE_ENTRY_MASK) == RADIX_TREE_INTERNAL_NODE; } /*** radix-tree API starts here ***/ #define RADIX_TREE_MAP_SHIFT XA_CHUNK_SHIFT #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) #define RADIX_TREE_MAX_TAGS XA_MAX_MARKS #define RADIX_TREE_TAG_LONGS XA_MARK_LONGS #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ RADIX_TREE_MAP_SHIFT)) /* The IDR tag is stored in the low bits of xa_flags */ #define ROOT_IS_IDR ((__force gfp_t)4) /* The top bits of xa_flags are used to store the root tags */ #define ROOT_TAG_SHIFT (__GFP_BITS_SHIFT) #define RADIX_TREE_INIT(name, mask) XARRAY_INIT(name, mask) #define RADIX_TREE(name, mask) \ struct radix_tree_root name = RADIX_TREE_INIT(name, mask) #define INIT_RADIX_TREE(root, mask) xa_init_flags(root, mask) static inline bool radix_tree_empty(const struct radix_tree_root *root) { return root->xa_head == NULL; } /** * struct radix_tree_iter - radix tree iterator state * * @index: index of current slot * @next_index: one beyond the last index for this chunk * @tags: bit-mask for tag-iterating * @node: node that contains current slot * * This radix tree iterator works in terms of "chunks" of slots. A chunk is a * subinterval of slots contained within one radix tree leaf node. It is * described by a pointer to its first slot and a struct radix_tree_iter * which holds the chunk's position in the tree and its size. For tagged * iteration radix_tree_iter also holds the slots' bit-mask for one chosen * radix tree tag. */ struct radix_tree_iter { unsigned long index; unsigned long next_index; unsigned long tags; struct radix_tree_node *node; }; /** * Radix-tree synchronization * * The radix-tree API requires that users provide all synchronisation (with * specific exceptions, noted below). * * Synchronization of access to the data items being stored in the tree, and * management of their lifetimes must be completely managed by API users. * * For API usage, in general, * - any function _modifying_ the tree or tags (inserting or deleting * items, setting or clearing tags) must exclude other modifications, and * exclude any functions reading the tree. * - any function _reading_ the tree or tags (looking up items or tags, * gang lookups) must exclude modifications to the tree, but may occur * concurrently with other readers. * * The notable exceptions to this rule are the following functions: * __radix_tree_lookup * radix_tree_lookup * radix_tree_lookup_slot * radix_tree_tag_get * radix_tree_gang_lookup * radix_tree_gang_lookup_tag * radix_tree_gang_lookup_tag_slot * radix_tree_tagged * * The first 7 functions are able to be called locklessly, using RCU. The * caller must ensure calls to these functions are made within rcu_read_lock() * regions. Other readers (lock-free or otherwise) and modifications may be * running concurrently. * * It is still required that the caller manage the synchronization and lifetimes * of the items. So if RCU lock-free lookups are used, typically this would mean * that the items have their own locks, or are amenable to lock-free access; and * that the items are freed by RCU (or only freed after having been deleted from * the radix tree *and* a synchronize_rcu() grace period). * * (Note, rcu_assign_pointer and rcu_dereference are not needed to control * access to data items when inserting into or looking up from the radix tree) * * Note that the value returned by radix_tree_tag_get() may not be relied upon * if only the RCU read lock is held. Functions to set/clear tags and to * delete nodes running concurrently with it may affect its result such that * two consecutive reads in the same locked section may return different * values. If reliability is required, modification functions must also be * excluded from concurrency. * * radix_tree_tagged is able to be called without locking or RCU. */ /** * radix_tree_deref_slot - dereference a slot * @slot: slot pointer, returned by radix_tree_lookup_slot * * For use with radix_tree_lookup_slot(). Caller must hold tree at least read * locked across slot lookup and dereference. Not required if write lock is * held (ie. items cannot be concurrently inserted). * * radix_tree_deref_retry must be used to confirm validity of the pointer if * only the read lock is held. * * Return: entry stored in that slot. */ static inline void *radix_tree_deref_slot(void __rcu **slot) { return rcu_dereference(*slot); } /** * radix_tree_deref_slot_protected - dereference a slot with tree lock held * @slot: slot pointer, returned by radix_tree_lookup_slot * * Similar to radix_tree_deref_slot. The caller does not hold the RCU read * lock but it must hold the tree lock to prevent parallel updates. * * Return: entry stored in that slot. */ static inline void *radix_tree_deref_slot_protected(void __rcu **slot, spinlock_t *treelock) { return rcu_dereference_protected(*slot, lockdep_is_held(treelock)); } /** * radix_tree_deref_retry - check radix_tree_deref_slot * @arg: pointer returned by radix_tree_deref_slot * Returns: 0 if retry is not required, otherwise retry is required * * radix_tree_deref_retry must be used with radix_tree_deref_slot. */ static inline int radix_tree_deref_retry(void *arg) { return unlikely(radix_tree_is_internal_node(arg)); } /** * radix_tree_exception - radix_tree_deref_slot returned either exception? * @arg: value returned by radix_tree_deref_slot * Returns: 0 if well-aligned pointer, non-0 if either kind of exception. */ static inline int radix_tree_exception(void *arg) { return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK); } int radix_tree_insert(struct radix_tree_root *, unsigned long index, void *); void *__radix_tree_lookup(const struct radix_tree_root *, unsigned long index, struct radix_tree_node **nodep, void __rcu ***slotp); void *radix_tree_lookup(const struct radix_tree_root *, unsigned long); void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *, unsigned long index); void __radix_tree_replace(struct radix_tree_root *, struct radix_tree_node *, void __rcu **slot, void *entry); void radix_tree_iter_replace(struct radix_tree_root *, const struct radix_tree_iter *, void __rcu **slot, void *entry); void radix_tree_replace_slot(struct radix_tree_root *, void __rcu **slot, void *entry); void radix_tree_iter_delete(struct radix_tree_root *, struct radix_tree_iter *iter, void __rcu **slot); void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); void *radix_tree_delete(struct radix_tree_root *, unsigned long); unsigned int radix_tree_gang_lookup(const struct radix_tree_root *, void **results, unsigned long first_index, unsigned int max_items); int radix_tree_preload(gfp_t gfp_mask); int radix_tree_maybe_preload(gfp_t gfp_mask); void radix_tree_init(void); void *radix_tree_tag_set(struct radix_tree_root *, unsigned long index, unsigned int tag); void *radix_tree_tag_clear(struct radix_tree_root *, unsigned long index, unsigned int tag); int radix_tree_tag_get(const struct radix_tree_root *, unsigned long index, unsigned int tag); void radix_tree_iter_tag_clear(struct radix_tree_root *, const struct radix_tree_iter *iter, unsigned int tag); unsigned int radix_tree_gang_lookup_tag(const struct radix_tree_root *, void **results, unsigned long first_index, unsigned int max_items, unsigned int tag); unsigned int radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *, void __rcu ***results, unsigned long first_index, unsigned int max_items, unsigned int tag); int radix_tree_tagged(const struct radix_tree_root *, unsigned int tag); static inline void radix_tree_preload_end(void) { local_unlock(&radix_tree_preloads.lock); } void __rcu **idr_get_free(struct radix_tree_root *root, struct radix_tree_iter *iter, gfp_t gfp, unsigned long max); enum { RADIX_TREE_ITER_TAG_MASK = 0x0f, /* tag index in lower nybble */ RADIX_TREE_ITER_TAGGED = 0x10, /* lookup tagged slots */ RADIX_TREE_ITER_CONTIG = 0x20, /* stop at first hole */ }; /** * radix_tree_iter_init - initialize radix tree iterator * * @iter: pointer to iterator state * @start: iteration starting index * Returns: NULL */ static __always_inline void __rcu ** radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start) { /* * Leave iter->tags uninitialized. radix_tree_next_chunk() will fill it * in the case of a successful tagged chunk lookup. If the lookup was * unsuccessful or non-tagged then nobody cares about ->tags. * * Set index to zero to bypass next_index overflow protection. * See the comment in radix_tree_next_chunk() for details. */ iter->index = 0; iter->next_index = start; return NULL; } /** * radix_tree_next_chunk - find next chunk of slots for iteration * * @root: radix tree root * @iter: iterator state * @flags: RADIX_TREE_ITER_* flags and tag index * Returns: pointer to chunk first slot, or NULL if there no more left * * This function looks up the next chunk in the radix tree starting from * @iter->next_index. It returns a pointer to the chunk's first slot. * Also it fills @iter with data about chunk: position in the tree (index), * its end (next_index), and constructs a bit mask for tagged iterating (tags). */ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *, struct radix_tree_iter *iter, unsigned flags); /** * radix_tree_iter_lookup - look up an index in the radix tree * @root: radix tree root * @iter: iterator state * @index: key to look up * * If @index is present in the radix tree, this function returns the slot * containing it and updates @iter to describe the entry. If @index is not * present, it returns NULL. */ static inline void __rcu ** radix_tree_iter_lookup(const struct radix_tree_root *root, struct radix_tree_iter *iter, unsigned long index) { radix_tree_iter_init(iter, index); return radix_tree_next_chunk(root, iter, RADIX_TREE_ITER_CONTIG); } /** * radix_tree_iter_retry - retry this chunk of the iteration * @iter: iterator state * * If we iterate over a tree protected only by the RCU lock, a race * against deletion or creation may result in seeing a slot for which * radix_tree_deref_retry() returns true. If so, call this function * and continue the iteration. */ static inline __must_check void __rcu **radix_tree_iter_retry(struct radix_tree_iter *iter) { iter->next_index = iter->index; iter->tags = 0; return NULL; } static inline unsigned long __radix_tree_iter_add(struct radix_tree_iter *iter, unsigned long slots) { return iter->index + slots; } /** * radix_tree_iter_resume - resume iterating when the chunk may be invalid * @slot: pointer to current slot * @iter: iterator state * Returns: New slot pointer * * If the iterator needs to release then reacquire a lock, the chunk may * have been invalidated by an insertion or deletion. Call this function * before releasing the lock to continue the iteration from the next index. */ void __rcu **__must_check radix_tree_iter_resume(void __rcu **slot, struct radix_tree_iter *iter); /** * radix_tree_chunk_size - get current chunk size * * @iter: pointer to radix tree iterator * Returns: current chunk size */ static __always_inline long radix_tree_chunk_size(struct radix_tree_iter *iter) { return iter->next_index - iter->index; } /** * radix_tree_next_slot - find next slot in chunk * * @slot: pointer to current slot * @iter: pointer to iterator state * @flags: RADIX_TREE_ITER_*, should be constant * Returns: pointer to next slot, or NULL if there no more left * * This function updates @iter->index in the case of a successful lookup. * For tagged lookup it also eats @iter->tags. * * There are several cases where 'slot' can be passed in as NULL to this * function. These cases result from the use of radix_tree_iter_resume() or * radix_tree_iter_retry(). In these cases we don't end up dereferencing * 'slot' because either: * a) we are doing tagged iteration and iter->tags has been set to 0, or * b) we are doing non-tagged iteration, and iter->index and iter->next_index * have been set up so that radix_tree_chunk_size() returns 1 or 0. */ static __always_inline void __rcu **radix_tree_next_slot(void __rcu **slot, struct radix_tree_iter *iter, unsigned flags) { if (flags & RADIX_TREE_ITER_TAGGED) { iter->tags >>= 1; if (unlikely(!iter->tags)) return NULL; if (likely(iter->tags & 1ul)) { iter->index = __radix_tree_iter_add(iter, 1); slot++; goto found; } if (!(flags & RADIX_TREE_ITER_CONTIG)) { unsigned offset = __ffs(iter->tags); iter->tags >>= offset++; iter->index = __radix_tree_iter_add(iter, offset); slot += offset; goto found; } } else { long count = radix_tree_chunk_size(iter); while (--count > 0) { slot++; iter->index = __radix_tree_iter_add(iter, 1); if (likely(*slot)) goto found; if (flags & RADIX_TREE_ITER_CONTIG) { /* forbid switching to the next chunk */ iter->next_index = 0; break; } } } return NULL; found: return slot; } /** * radix_tree_for_each_slot - iterate over non-empty slots * * @slot: the void** variable for pointer to slot * @root: the struct radix_tree_root pointer * @iter: the struct radix_tree_iter pointer * @start: iteration starting index * * @slot points to radix tree slot, @iter->index contains its index. */ #define radix_tree_for_each_slot(slot, root, iter, start) \ for (slot = radix_tree_iter_init(iter, start) ; \ slot || (slot = radix_tree_next_chunk(root, iter, 0)) ; \ slot = radix_tree_next_slot(slot, iter, 0)) /** * radix_tree_for_each_tagged - iterate over tagged slots * * @slot: the void** variable for pointer to slot * @root: the struct radix_tree_root pointer * @iter: the struct radix_tree_iter pointer * @start: iteration starting index * @tag: tag index * * @slot points to radix tree slot, @iter->index contains its index. */ #define radix_tree_for_each_tagged(slot, root, iter, start, tag) \ for (slot = radix_tree_iter_init(iter, start) ; \ slot || (slot = radix_tree_next_chunk(root, iter, \ RADIX_TREE_ITER_TAGGED | tag)) ; \ slot = radix_tree_next_slot(slot, iter, \ RADIX_TREE_ITER_TAGGED | tag)) #endif /* _LINUX_RADIX_TREE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _INET_COMMON_H #define _INET_COMMON_H #include <linux/indirect_call_wrapper.h> extern const struct proto_ops inet_stream_ops; extern const struct proto_ops inet_dgram_ops; /* * INET4 prototypes used by INET6 */ struct msghdr; struct sock; struct sockaddr; struct socket; int inet_release(struct socket *sock); int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags, int is_sendmsg); int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); int inet_accept(struct socket *sock, struct socket *newsock, int flags, bool kern); int inet_send_prepare(struct sock *sk); int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size); ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); int inet_shutdown(struct socket *sock, int how); int inet_listen(struct socket *sock, int backlog); void inet_sock_destruct(struct sock *sk); int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); /* Don't allocate port at this moment, defer to connect. */ #define BIND_FORCE_ADDRESS_NO_PORT (1 << 0) /* Grab and release socket lock. */ #define BIND_WITH_LOCK (1 << 1) /* Called from BPF program. */ #define BIND_FROM_BPF (1 << 2) int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, u32 flags); int inet_getname(struct socket *sock, struct sockaddr *uaddr, int peer); int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int inet_ctl_sock_create(struct sock **sk, unsigned short family, unsigned short type, unsigned char protocol, struct net *net); int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb); int inet_gro_complete(struct sk_buff *skb, int nhoff); struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features); static inline void inet_ctl_sock_destroy(struct sock *sk) { if (sk) sock_release(sk->sk_socket); } #define indirect_call_gro_receive(f2, f1, cb, head, skb) \ ({ \ unlikely(gro_recursion_inc_test(skb)) ? \ NAPI_GRO_CB(skb)->flush |= 1, NULL : \ INDIRECT_CALL_2(cb, f2, f1, head, skb); \ }) #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_SPECIAL_INSNS_H #define _ASM_X86_SPECIAL_INSNS_H #ifdef __KERNEL__ #include <asm/nops.h> #include <asm/processor-flags.h> #include <linux/irqflags.h> #include <linux/jump_label.h> /* * The compiler should not reorder volatile asm statements with respect to each * other: they should execute in program order. However GCC 4.9.x and 5.x have * a bug (which was fixed in 8.1, 7.3 and 6.5) where they might reorder * volatile asm. The write functions are not affected since they have memory * clobbers preventing reordering. To prevent reads from being reordered with * respect to writes, use a dummy memory operand. */ #define __FORCE_ORDER "m"(*(unsigned int *)0x1000UL) void native_write_cr0(unsigned long val); static inline unsigned long native_read_cr0(void) { unsigned long val; asm volatile("mov %%cr0,%0\n\t" : "=r" (val) : __FORCE_ORDER); return val; } static __always_inline unsigned long native_read_cr2(void) { unsigned long val; asm volatile("mov %%cr2,%0\n\t" : "=r" (val) : __FORCE_ORDER); return val; } static __always_inline void native_write_cr2(unsigned long val) { asm volatile("mov %0,%%cr2": : "r" (val) : "memory"); } static inline unsigned long __native_read_cr3(void) { unsigned long val; asm volatile("mov %%cr3,%0\n\t" : "=r" (val) : __FORCE_ORDER); return val; } static inline void native_write_cr3(unsigned long val) { asm volatile("mov %0,%%cr3": : "r" (val) : "memory"); } static inline unsigned long native_read_cr4(void) { unsigned long val; #ifdef CONFIG_X86_32 /* * This could fault if CR4 does not exist. Non-existent CR4 * is functionally equivalent to CR4 == 0. Keep it simple and pretend * that CR4 == 0 on CPUs that don't have CR4. */ asm volatile("1: mov %%cr4, %0\n" "2:\n" _ASM_EXTABLE(1b, 2b) : "=r" (val) : "0" (0), __FORCE_ORDER); #else /* CR4 always exists on x86_64. */ asm volatile("mov %%cr4,%0\n\t" : "=r" (val) : __FORCE_ORDER); #endif return val; } void native_write_cr4(unsigned long val); #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS static inline u32 rdpkru(void) { u32 ecx = 0; u32 edx, pkru; /* * "rdpkru" instruction. Places PKRU contents in to EAX, * clears EDX and requires that ecx=0. */ asm volatile(".byte 0x0f,0x01,0xee\n\t" : "=a" (pkru), "=d" (edx) : "c" (ecx)); return pkru; } static inline void wrpkru(u32 pkru) { u32 ecx = 0, edx = 0; /* * "wrpkru" instruction. Loads contents in EAX to PKRU, * requires that ecx = edx = 0. */ asm volatile(".byte 0x0f,0x01,0xef\n\t" : : "a" (pkru), "c"(ecx), "d"(edx)); } static inline void __write_pkru(u32 pkru) { /* * WRPKRU is relatively expensive compared to RDPKRU. * Avoid WRPKRU when it would not change the value. */ if (pkru == rdpkru()) return; wrpkru(pkru); } #else static inline u32 rdpkru(void) { return 0; } static inline void __write_pkru(u32 pkru) { } #endif static inline void native_wbinvd(void) { asm volatile("wbinvd": : :"memory"); } extern asmlinkage void asm_load_gs_index(unsigned int selector); static inline void native_load_gs_index(unsigned int selector) { unsigned long flags; local_irq_save(flags); asm_load_gs_index(selector); local_irq_restore(flags); } static inline unsigned long __read_cr4(void) { return native_read_cr4(); } #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else static inline unsigned long read_cr0(void) { return native_read_cr0(); } static inline void write_cr0(unsigned long x) { native_write_cr0(x); } static __always_inline unsigned long read_cr2(void) { return native_read_cr2(); } static __always_inline void write_cr2(unsigned long x) { native_write_cr2(x); } /* * Careful! CR3 contains more than just an address. You probably want * read_cr3_pa() instead. */ static inline unsigned long __read_cr3(void) { return __native_read_cr3(); } static inline void write_cr3(unsigned long x) { native_write_cr3(x); } static inline void __write_cr4(unsigned long x) { native_write_cr4(x); } static inline void wbinvd(void) { native_wbinvd(); } #ifdef CONFIG_X86_64 static inline void load_gs_index(unsigned int selector) { native_load_gs_index(selector); } #endif #endif /* CONFIG_PARAVIRT_XXL */ static inline void clflush(volatile void *__p) { asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); } static inline void clflushopt(volatile void *__p) { alternative_io(".byte " __stringify(NOP_DS_PREFIX) "; clflush %P0", ".byte 0x66; clflush %P0", X86_FEATURE_CLFLUSHOPT, "+m" (*(volatile char __force *)__p)); } static inline void clwb(volatile void *__p) { volatile struct { char x[64]; } *p = __p; asm volatile(ALTERNATIVE_2( ".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])", ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */ X86_FEATURE_CLFLUSHOPT, ".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */ X86_FEATURE_CLWB) : [p] "+m" (*p) : [pax] "a" (p)); } #define nop() asm volatile ("nop") static inline void serialize(void) { /* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */ asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory"); } /* The dst parameter must be 64-bytes aligned */ static inline void movdir64b(void *dst, const void *src) { const struct { char _[64]; } *__src = src; struct { char _[64]; } *__dst = dst; /* * MOVDIR64B %(rdx), rax. * * Both __src and __dst must be memory constraints in order to tell the * compiler that no other memory accesses should be reordered around * this one. * * Also, both must be supplied as lvalues because this tells * the compiler what the object is (its size) the instruction accesses. * I.e., not the pointers but what they point to, thus the deref'ing '*'. */ asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02" : "+m" (*__dst) : "m" (*__src), "a" (__dst), "d" (__src)); } /** * enqcmds - Enqueue a command in supervisor (CPL0) mode * @dst: destination, in MMIO space (must be 512-bit aligned) * @src: 512 bits memory operand * * The ENQCMDS instruction allows software to write a 512-bit command to * a 512-bit-aligned special MMIO region that supports the instruction. * A return status is loaded into the ZF flag in the RFLAGS register. * ZF = 0 equates to success, and ZF = 1 indicates retry or error. * * This function issues the ENQCMDS instruction to submit data from * kernel space to MMIO space, in a unit of 512 bits. Order of data access * is not guaranteed, nor is a memory barrier performed afterwards. It * returns 0 on success and -EAGAIN on failure. * * Warning: Do not use this helper unless your driver has checked that the * ENQCMDS instruction is supported on the platform and the device accepts * ENQCMDS. */ static inline int enqcmds(void __iomem *dst, const void *src) { const struct { char _[64]; } *__src = src; struct { char _[64]; } __iomem *__dst = dst; bool zf; /* * ENQCMDS %(rdx), rax * * See movdir64b()'s comment on operand specification. */ asm volatile(".byte 0xf3, 0x0f, 0x38, 0xf8, 0x02, 0x66, 0x90" CC_SET(z) : CC_OUT(z) (zf), "+m" (*__dst) : "m" (*__src), "a" (__dst), "d" (__src)); /* Submission failure is indicated via EFLAGS.ZF=1 */ if (zf) return -EAGAIN; return 0; } #endif /* __KERNEL__ */ #endif /* _ASM_X86_SPECIAL_INSNS_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM signal #if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SIGNAL_H #include <linux/signal.h> #include <linux/sched.h> #include <linux/tracepoint.h> #define TP_STORE_SIGINFO(__entry, info) \ do { \ if (info == SEND_SIG_NOINFO) { \ __entry->errno = 0; \ __entry->code = SI_USER; \ } else if (info == SEND_SIG_PRIV) { \ __entry->errno = 0; \ __entry->code = SI_KERNEL; \ } else { \ __entry->errno = info->si_errno; \ __entry->code = info->si_code; \ } \ } while (0) #ifndef TRACE_HEADER_MULTI_READ enum { TRACE_SIGNAL_DELIVERED, TRACE_SIGNAL_IGNORED, TRACE_SIGNAL_ALREADY_PENDING, TRACE_SIGNAL_OVERFLOW_FAIL, TRACE_SIGNAL_LOSE_INFO, }; #endif /** * signal_generate - called when a signal is generated * @sig: signal number * @info: pointer to struct siginfo * @task: pointer to struct task_struct * @group: shared or private * @result: TRACE_SIGNAL_* * * Current process sends a 'sig' signal to 'task' process with * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, * 'info' is not a pointer and you can't access its field. Instead, * SEND_SIG_NOINFO means that si_code is SI_USER, and SEND_SIG_PRIV * means that si_code is SI_KERNEL. */ TRACE_EVENT(signal_generate, TP_PROTO(int sig, struct kernel_siginfo *info, struct task_struct *task, int group, int result), TP_ARGS(sig, info, task, group, result), TP_STRUCT__entry( __field( int, sig ) __field( int, errno ) __field( int, code ) __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, group ) __field( int, result ) ), TP_fast_assign( __entry->sig = sig; TP_STORE_SIGINFO(__entry, info); memcpy(__entry->comm, task->comm, TASK_COMM_LEN); __entry->pid = task->pid; __entry->group = group; __entry->result = result; ), TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d grp=%d res=%d", __entry->sig, __entry->errno, __entry->code, __entry->comm, __entry->pid, __entry->group, __entry->result) ); /** * signal_deliver - called when a signal is delivered * @sig: signal number * @info: pointer to struct siginfo * @ka: pointer to struct k_sigaction * * A 'sig' signal is delivered to current process with 'info' siginfo, * and it will be handled by 'ka'. ka->sa.sa_handler can be SIG_IGN or * SIG_DFL. * Note that some signals reported by signal_generate tracepoint can be * lost, ignored or modified (by debugger) before hitting this tracepoint. * This means, this can show which signals are actually delivered, but * matching generated signals and delivered signals may not be correct. */ TRACE_EVENT(signal_deliver, TP_PROTO(int sig, struct kernel_siginfo *info, struct k_sigaction *ka), TP_ARGS(sig, info, ka), TP_STRUCT__entry( __field( int, sig ) __field( int, errno ) __field( int, code ) __field( unsigned long, sa_handler ) __field( unsigned long, sa_flags ) ), TP_fast_assign( __entry->sig = sig; TP_STORE_SIGINFO(__entry, info); __entry->sa_handler = (unsigned long)ka->sa.sa_handler; __entry->sa_flags = ka->sa.sa_flags; ), TP_printk("sig=%d errno=%d code=%d sa_handler=%lx sa_flags=%lx", __entry->sig, __entry->errno, __entry->code, __entry->sa_handler, __entry->sa_flags) ); #endif /* _TRACE_SIGNAL_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 // SPDX-License-Identifier: GPL-2.0-only /* * linux/kernel/ptrace.c * * (C) Copyright 1999 Linus Torvalds * * Common interfaces for "ptrace()" which we do not want * to continually duplicate across every architecture. */ #include <linux/capability.h> #include <linux/export.h> #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/sched/coredump.h> #include <linux/sched/task.h> #include <linux/errno.h> #include <linux/mm.h> #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/ptrace.h> #include <linux/security.h> #include <linux/signal.h> #include <linux/uio.h> #include <linux/audit.h> #include <linux/pid_namespace.h> #include <linux/syscalls.h> #include <linux/uaccess.h> #include <linux/regset.h> #include <linux/hw_breakpoint.h> #include <linux/cn_proc.h> #include <linux/compat.h> #include <linux/sched/signal.h> #include <asm/syscall.h> /* for syscall_get_* */ /* * Access another process' address space via ptrace. * Source/target buffer must be kernel space, * Do not walk the page table directly, use get_user_pages */ int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct mm_struct *mm; int ret; mm = get_task_mm(tsk); if (!mm) return 0; if (!tsk->ptrace || (current != tsk->parent) || ((get_dumpable(mm) != SUID_DUMP_USER) && !ptracer_capable(tsk, mm->user_ns))) { mmput(mm); return 0; } ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags); mmput(mm); return ret; } void __ptrace_link(struct task_struct *child, struct task_struct *new_parent, const struct cred *ptracer_cred) { BUG_ON(!list_empty(&child->ptrace_entry)); list_add(&child->ptrace_entry, &new_parent->ptraced); child->parent = new_parent; child->ptracer_cred = get_cred(ptracer_cred); } /* * ptrace a task: make the debugger its new parent and * move it to the ptrace list. * * Must be called with the tasklist lock write-held. */ static void ptrace_link(struct task_struct *child, struct task_struct *new_parent) { __ptrace_link(child, new_parent, current_cred()); } /** * __ptrace_unlink - unlink ptracee and restore its execution state * @child: ptracee to be unlinked * * Remove @child from the ptrace list, move it back to the original parent, * and restore the execution state so that it conforms to the group stop * state. * * Unlinking can happen via two paths - explicit PTRACE_DETACH or ptracer * exiting. For PTRACE_DETACH, unless the ptracee has been killed between * ptrace_check_attach() and here, it's guaranteed to be in TASK_TRACED. * If the ptracer is exiting, the ptracee can be in any state. * * After detach, the ptracee should be in a state which conforms to the * group stop. If the group is stopped or in the process of stopping, the * ptracee should be put into TASK_STOPPED; otherwise, it should be woken * up from TASK_TRACED. * * If the ptracee is in TASK_TRACED and needs to be moved to TASK_STOPPED, * it goes through TRACED -> RUNNING -> STOPPED transition which is similar * to but in the opposite direction of what happens while attaching to a * stopped task. However, in this direction, the intermediate RUNNING * state is not hidden even from the current ptracer and if it immediately * re-attaches and performs a WNOHANG wait(2), it may fail. * * CONTEXT: * write_lock_irq(tasklist_lock) */ void __ptrace_unlink(struct task_struct *child) { const struct cred *old_cred; BUG_ON(!child->ptrace); clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); #endif child->parent = child->real_parent; list_del_init(&child->ptrace_entry); old_cred = child->ptracer_cred; child->ptracer_cred = NULL; put_cred(old_cred); spin_lock(&child->sighand->siglock); child->ptrace = 0; /* * Clear all pending traps and TRAPPING. TRAPPING should be * cleared regardless of JOBCTL_STOP_PENDING. Do it explicitly. */ task_clear_jobctl_pending(child, JOBCTL_TRAP_MASK); task_clear_jobctl_trapping(child); /* * Reinstate JOBCTL_STOP_PENDING if group stop is in effect and * @child isn't dead. */ if (!(child->flags & PF_EXITING) && (child->signal->flags & SIGNAL_STOP_STOPPED || child->signal->group_stop_count)) { child->jobctl |= JOBCTL_STOP_PENDING; /* * This is only possible if this thread was cloned by the * traced task running in the stopped group, set the signal * for the future reports. * FIXME: we should change ptrace_init_task() to handle this * case. */ if (!(child->jobctl & JOBCTL_STOP_SIGMASK)) child->jobctl |= SIGSTOP; } /* * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick * @child in the butt. Note that @resume should be used iff @child * is in TASK_TRACED; otherwise, we might unduly disrupt * TASK_KILLABLE sleeps. */ if (child->jobctl & JOBCTL_STOP_PENDING || task_is_traced(child)) ptrace_signal_wake_up(child, true); spin_unlock(&child->sighand->siglock); } static bool looks_like_a_spurious_pid(struct task_struct *task) { if (task->exit_code != ((PTRACE_EVENT_EXEC << 8) | SIGTRAP)) return false; if (task_pid_vnr(task) == task->ptrace_message) return false; /* * The tracee changed its pid but the PTRACE_EVENT_EXEC event * was not wait()'ed, most probably debugger targets the old * leader which was destroyed in de_thread(). */ return true; } /* Ensure that nothing can wake it up, even SIGKILL */ static bool ptrace_freeze_traced(struct task_struct *task) { bool ret = false; /* Lockless, nobody but us can set this flag */ if (task->jobctl & JOBCTL_LISTENING) return ret; spin_lock_irq(&task->sighand->siglock); if (task_is_traced(task) && !looks_like_a_spurious_pid(task) && !__fatal_signal_pending(task)) { task->state = __TASK_TRACED; ret = true; } spin_unlock_irq(&task->sighand->siglock); return ret; } static void ptrace_unfreeze_traced(struct task_struct *task) { if (task->state != __TASK_TRACED) return; WARN_ON(!task->ptrace || task->parent != current); /* * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely. * Recheck state under the lock to close this race. */ spin_lock_irq(&task->sighand->siglock); if (task->state == __TASK_TRACED) { if (__fatal_signal_pending(task)) wake_up_state(task, __TASK_TRACED); else task->state = TASK_TRACED; } spin_unlock_irq(&task->sighand->siglock); } /** * ptrace_check_attach - check whether ptracee is ready for ptrace operation * @child: ptracee to check for * @ignore_state: don't check whether @child is currently %TASK_TRACED * * Check whether @child is being ptraced by %current and ready for further * ptrace operations. If @ignore_state is %false, @child also should be in * %TASK_TRACED state and on return the child is guaranteed to be traced * and not executing. If @ignore_state is %true, @child can be in any * state. * * CONTEXT: * Grabs and releases tasklist_lock and @child->sighand->siglock. * * RETURNS: * 0 on success, -ESRCH if %child is not ready. */ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) { int ret = -ESRCH; /* * We take the read lock around doing both checks to close a * possible race where someone else was tracing our child and * detached between these two checks. After this locked check, * we are sure that this is our traced child and that can only * be changed by us so it's not changing right after this. */ read_lock(&tasklist_lock); if (child->ptrace && child->parent == current) { WARN_ON(child->state == __TASK_TRACED); /* * child->sighand can't be NULL, release_task() * does ptrace_unlink() before __exit_signal(). */ if (ignore_state || ptrace_freeze_traced(child)) ret = 0; } read_unlock(&tasklist_lock); if (!ret && !ignore_state) { if (!wait_task_inactive(child, __TASK_TRACED)) { /* * This can only happen if may_ptrace_stop() fails and * ptrace_stop() changes ->state back to TASK_RUNNING, * so we should not worry about leaking __TASK_TRACED. */ WARN_ON(child->state == __TASK_TRACED); ret = -ESRCH; } } return ret; } static bool ptrace_has_cap(struct user_namespace *ns, unsigned int mode) { if (mode & PTRACE_MODE_NOAUDIT) return ns_capable_noaudit(ns, CAP_SYS_PTRACE); return ns_capable(ns, CAP_SYS_PTRACE); } /* Returns 0 on success, -errno on denial. */ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) { const struct cred *cred = current_cred(), *tcred; struct mm_struct *mm; kuid_t caller_uid; kgid_t caller_gid; if (!(mode & PTRACE_MODE_FSCREDS) == !(mode & PTRACE_MODE_REALCREDS)) { WARN(1, "denying ptrace access check without PTRACE_MODE_*CREDS\n"); return -EPERM; } /* May we inspect the given task? * This check is used both for attaching with ptrace * and for allowing access to sensitive information in /proc. * * ptrace_attach denies several cases that /proc allows * because setting up the necessary parent/child relationship * or halting the specified task is impossible. */ /* Don't let security modules deny introspection */ if (same_thread_group(task, current)) return 0; rcu_read_lock(); if (mode & PTRACE_MODE_FSCREDS) { caller_uid = cred->fsuid; caller_gid = cred->fsgid; } else { /* * Using the euid would make more sense here, but something * in userland might rely on the old behavior, and this * shouldn't be a security problem since * PTRACE_MODE_REALCREDS implies that the caller explicitly * used a syscall that requests access to another process * (and not a filesystem syscall to procfs). */ caller_uid = cred->uid; caller_gid = cred->gid; } tcred = __task_cred(task); if (uid_eq(caller_uid, tcred->euid) && uid_eq(caller_uid, tcred->suid) && uid_eq(caller_uid, tcred->uid) && gid_eq(caller_gid, tcred->egid) && gid_eq(caller_gid, tcred->sgid) && gid_eq(caller_gid, tcred->gid)) goto ok; if (ptrace_has_cap(tcred->user_ns, mode)) goto ok; rcu_read_unlock(); return -EPERM; ok: rcu_read_unlock(); /* * If a task drops privileges and becomes nondumpable (through a syscall * like setresuid()) while we are trying to access it, we must ensure * that the dumpability is read after the credentials; otherwise, * we may be able to attach to a task that we shouldn't be able to * attach to (as if the task had dropped privileges without becoming * nondumpable). * Pairs with a write barrier in commit_creds(). */ smp_rmb(); mm = task->mm; if (mm && ((get_dumpable(mm) != SUID_DUMP_USER) && !ptrace_has_cap(mm->user_ns, mode))) return -EPERM; return security_ptrace_access_check(task, mode); } bool ptrace_may_access(struct task_struct *task, unsigned int mode) { int err; task_lock(task); err = __ptrace_may_access(task, mode); task_unlock(task); return !err; } static int ptrace_attach(struct task_struct *task, long request, unsigned long addr, unsigned long flags) { bool seize = (request == PTRACE_SEIZE); int retval; retval = -EIO; if (seize) { if (addr != 0) goto out; if (flags & ~(unsigned long)PTRACE_O_MASK) goto out; flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT); } else { flags = PT_PTRACED; } audit_ptrace(task); retval = -EPERM; if (unlikely(task->flags & PF_KTHREAD)) goto out; if (same_thread_group(task, current)) goto out; /* * Protect exec's credential calculations against our interference; * SUID, SGID and LSM creds get determined differently * under ptrace. */ retval = -ERESTARTNOINTR; if (mutex_lock_interruptible(&task->signal->cred_guard_mutex)) goto out; task_lock(task); retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS); task_unlock(task); if (retval) goto unlock_creds; write_lock_irq(&tasklist_lock); retval = -EPERM; if (unlikely(task->exit_state)) goto unlock_tasklist; if (task->ptrace) goto unlock_tasklist; if (seize) flags |= PT_SEIZED; task->ptrace = flags; ptrace_link(task, current); /* SEIZE doesn't trap tracee on attach */ if (!seize) send_sig_info(SIGSTOP, SEND_SIG_PRIV, task); spin_lock(&task->sighand->siglock); /* * If the task is already STOPPED, set JOBCTL_TRAP_STOP and * TRAPPING, and kick it so that it transits to TRACED. TRAPPING * will be cleared if the child completes the transition or any * event which clears the group stop states happens. We'll wait * for the transition to complete before returning from this * function. * * This hides STOPPED -> RUNNING -> TRACED transition from the * attaching thread but a different thread in the same group can * still observe the transient RUNNING state. IOW, if another * thread's WNOHANG wait(2) on the stopped tracee races against * ATTACH, the wait(2) may fail due to the transient RUNNING. * * The following task_is_stopped() test is safe as both transitions * in and out of STOPPED are protected by siglock. */ if (task_is_stopped(task) && task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) signal_wake_up_state(task, __TASK_STOPPED); spin_unlock(&task->sighand->siglock); retval = 0; unlock_tasklist: write_unlock_irq(&tasklist_lock); unlock_creds: mutex_unlock(&task->signal->cred_guard_mutex); out: if (!retval) { /* * We do not bother to change retval or clear JOBCTL_TRAPPING * if wait_on_bit() was interrupted by SIGKILL. The tracer will * not return to user-mode, it will exit and clear this bit in * __ptrace_unlink() if it wasn't already cleared by the tracee; * and until then nobody can ptrace this task. */ wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, TASK_KILLABLE); proc_ptrace_connector(task, PTRACE_ATTACH); } return retval; } /** * ptrace_traceme -- helper for PTRACE_TRACEME * * Performs checks and sets PT_PTRACED. * Should be used by all ptrace implementations for PTRACE_TRACEME. */ static int ptrace_traceme(void) { int ret = -EPERM; write_lock_irq(&tasklist_lock); /* Are we already being traced? */ if (!current->ptrace) { ret = security_ptrace_traceme(current->parent); /* * Check PF_EXITING to ensure ->real_parent has not passed * exit_ptrace(). Otherwise we don't report the error but * pretend ->real_parent untraces us right after return. */ if (!ret && !(current->real_parent->flags & PF_EXITING)) { current->ptrace = PT_PTRACED; ptrace_link(current, current->real_parent); } } write_unlock_irq(&tasklist_lock); return ret; } /* * Called with irqs disabled, returns true if childs should reap themselves. */ static int ignoring_children(struct sighand_struct *sigh) { int ret; spin_lock(&sigh->siglock); ret = (sigh->action[SIGCHLD-1].sa.sa_handler == SIG_IGN) || (sigh->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT); spin_unlock(&sigh->siglock); return ret; } /* * Called with tasklist_lock held for writing. * Unlink a traced task, and clean it up if it was a traced zombie. * Return true if it needs to be reaped with release_task(). * (We can't call release_task() here because we already hold tasklist_lock.) * * If it's a zombie, our attachedness prevented normal parent notification * or self-reaping. Do notification now if it would have happened earlier. * If it should reap itself, return true. * * If it's our own child, there is no notification to do. But if our normal * children self-reap, then this child was prevented by ptrace and we must * reap it now, in that case we must also wake up sub-threads sleeping in * do_wait(). */ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) { bool dead; __ptrace_unlink(p); if (p->exit_state != EXIT_ZOMBIE) return false; dead = !thread_group_leader(p); if (!dead && thread_group_empty(p)) { if (!same_thread_group(p->real_parent, tracer)) dead = do_notify_parent(p, p->exit_signal); else if (ignoring_children(tracer->sighand)) { __wake_up_parent(p, tracer); dead = true; } } /* Mark it as in the process of being reaped. */ if (dead) p->exit_state = EXIT_DEAD; return dead; } static int ptrace_detach(struct task_struct *child, unsigned int data) { if (!valid_signal(data)) return -EIO; /* Architecture-specific hardware disable .. */ ptrace_disable(child); write_lock_irq(&tasklist_lock); /* * We rely on ptrace_freeze_traced(). It can't be killed and * untraced by another thread, it can't be a zombie. */ WARN_ON(!child->ptrace || child->exit_state); /* * tasklist_lock avoids the race with wait_task_stopped(), see * the comment in ptrace_resume(). */ child->exit_code = data; __ptrace_detach(current, child); write_unlock_irq(&tasklist_lock); proc_ptrace_connector(child, PTRACE_DETACH); return 0; } /* * Detach all tasks we were using ptrace on. Called with tasklist held * for writing. */ void exit_ptrace(struct task_struct *tracer, struct list_head *dead) { struct task_struct *p, *n; list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) { if (unlikely(p->ptrace & PT_EXITKILL)) send_sig_info(SIGKILL, SEND_SIG_PRIV, p); if (__ptrace_detach(tracer, p)) list_add(&p->ptrace_entry, dead); } } int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len) { int copied = 0; while (len > 0) { char buf[128]; int this_len, retval; this_len = (len > sizeof(buf)) ? sizeof(buf) : len; retval = ptrace_access_vm(tsk, src, buf, this_len, FOLL_FORCE); if (!retval) { if (copied) break; return -EIO; } if (copy_to_user(dst, buf, retval)) return -EFAULT; copied += retval; src += retval; dst += retval; len -= retval; } return copied; } int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len) { int copied = 0; while (len > 0) { char buf[128]; int this_len, retval; this_len = (len > sizeof(buf)) ? sizeof(buf) : len; if (copy_from_user(buf, src, this_len)) return -EFAULT; retval = ptrace_access_vm(tsk, dst, buf, this_len, FOLL_FORCE | FOLL_WRITE); if (!retval) { if (copied) break; return -EIO; } copied += retval; src += retval; dst += retval; len -= retval; } return copied; } static int ptrace_setoptions(struct task_struct *child, unsigned long data) { unsigned flags; if (data & ~(unsigned long)PTRACE_O_MASK) return -EINVAL; if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) { if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) || !IS_ENABLED(CONFIG_SECCOMP)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (seccomp_mode(&current->seccomp) != SECCOMP_MODE_DISABLED || current->ptrace & PT_SUSPEND_SECCOMP) return -EPERM; } /* Avoid intermediate state when all opts are cleared */ flags = child->ptrace; flags &= ~(PTRACE_O_MASK << PT_OPT_FLAG_SHIFT); flags |= (data << PT_OPT_FLAG_SHIFT); child->ptrace = flags; return 0; } static int ptrace_getsiginfo(struct task_struct *child, kernel_siginfo_t *info) { unsigned long flags; int error = -ESRCH; if (lock_task_sighand(child, &flags)) { error = -EINVAL; if (likely(child->last_siginfo != NULL)) { copy_siginfo(info, child->last_siginfo); error = 0; } unlock_task_sighand(child, &flags); } return error; } static int ptrace_setsiginfo(struct task_struct *child, const kernel_siginfo_t *info) { unsigned long flags; int error = -ESRCH; if (lock_task_sighand(child, &flags)) { error = -EINVAL; if (likely(child->last_siginfo != NULL)) { copy_siginfo(child->last_siginfo, info); error = 0; } unlock_task_sighand(child, &flags); } return error; } static int ptrace_peek_siginfo(struct task_struct *child, unsigned long addr, unsigned long data) { struct ptrace_peeksiginfo_args arg; struct sigpending *pending; struct sigqueue *q; int ret, i; ret = copy_from_user(&arg, (void __user *) addr, sizeof(struct ptrace_peeksiginfo_args)); if (ret) return -EFAULT; if (arg.flags & ~PTRACE_PEEKSIGINFO_SHARED) return -EINVAL; /* unknown flags */ if (arg.nr < 0) return -EINVAL; /* Ensure arg.off fits in an unsigned long */ if (arg.off > ULONG_MAX) return 0; if (arg.flags & PTRACE_PEEKSIGINFO_SHARED) pending = &child->signal->shared_pending; else pending = &child->pending; for (i = 0; i < arg.nr; ) { kernel_siginfo_t info; unsigned long off = arg.off + i; bool found = false; spin_lock_irq(&child->sighand->siglock); list_for_each_entry(q, &pending->list, list) { if (!off--) { found = true; copy_siginfo(&info, &q->info); break; } } spin_unlock_irq(&child->sighand->siglock); if (!found) /* beyond the end of the list */ break; #ifdef CONFIG_COMPAT if (unlikely(in_compat_syscall())) { compat_siginfo_t __user *uinfo = compat_ptr(data); if (copy_siginfo_to_user32(uinfo, &info)) { ret = -EFAULT; break; } } else #endif { siginfo_t __user *uinfo = (siginfo_t __user *) data; if (copy_siginfo_to_user(uinfo, &info)) { ret = -EFAULT; break; } } data += sizeof(siginfo_t); i++; if (signal_pending(current)) break; cond_resched(); } if (i > 0) return i; return ret; } #ifdef PTRACE_SINGLESTEP #define is_singlestep(request) ((request) == PTRACE_SINGLESTEP) #else #define is_singlestep(request) 0 #endif #ifdef PTRACE_SINGLEBLOCK #define is_singleblock(request) ((request) == PTRACE_SINGLEBLOCK) #else #define is_singleblock(request) 0 #endif #ifdef PTRACE_SYSEMU #define is_sysemu_singlestep(request) ((request) == PTRACE_SYSEMU_SINGLESTEP) #else #define is_sysemu_singlestep(request) 0 #endif static int ptrace_resume(struct task_struct *child, long request, unsigned long data) { bool need_siglock; if (!valid_signal(data)) return -EIO; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); else clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU if (request == PTRACE_SYSEMU || request == PTRACE_SYSEMU_SINGLESTEP) set_tsk_thread_flag(child, TIF_SYSCALL_EMU); else clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); #endif if (is_singleblock(request)) { if (unlikely(!arch_has_block_step())) return -EIO; user_enable_block_step(child); } else if (is_singlestep(request) || is_sysemu_singlestep(request)) { if (unlikely(!arch_has_single_step())) return -EIO; user_enable_single_step(child); } else { user_disable_single_step(child); } /* * Change ->exit_code and ->state under siglock to avoid the race * with wait_task_stopped() in between; a non-zero ->exit_code will * wrongly look like another report from tracee. * * Note that we need siglock even if ->exit_code == data and/or this * status was not reported yet, the new status must not be cleared by * wait_task_stopped() after resume. * * If data == 0 we do not care if wait_task_stopped() reports the old * status and clears the code too; this can't race with the tracee, it * takes siglock after resume. */ need_siglock = data && !thread_group_empty(current); if (need_siglock) spin_lock_irq(&child->sighand->siglock); child->exit_code = data; wake_up_state(child, __TASK_TRACED); if (need_siglock) spin_unlock_irq(&child->sighand->siglock); return 0; } #ifdef CONFIG_HAVE_ARCH_TRACEHOOK static const struct user_regset * find_regset(const struct user_regset_view *view, unsigned int type) { const struct user_regset *regset; int n; for (n = 0; n < view->n; ++n) { regset = view->regsets + n; if (regset->core_note_type == type) return regset; } return NULL; } static int ptrace_regset(struct task_struct *task, int req, unsigned int type, struct iovec *kiov) { const struct user_regset_view *view = task_user_regset_view(task); const struct user_regset *regset = find_regset(view, type); int regset_no; if (!regset || (kiov->iov_len % regset->size) != 0) return -EINVAL; regset_no = regset - view->regsets; kiov->iov_len = min(kiov->iov_len, (__kernel_size_t) (regset->n * regset->size)); if (req == PTRACE_GETREGSET) return copy_regset_to_user(task, view, regset_no, 0, kiov->iov_len, kiov->iov_base); else return copy_regset_from_user(task, view, regset_no, 0, kiov->iov_len, kiov->iov_base); } /* * This is declared in linux/regset.h and defined in machine-dependent * code. We put the export here, near the primary machine-neutral use, * to ensure no machine forgets it. */ EXPORT_SYMBOL_GPL(task_user_regset_view); static unsigned long ptrace_get_syscall_info_entry(struct task_struct *child, struct pt_regs *regs, struct ptrace_syscall_info *info) { unsigned long args[ARRAY_SIZE(info->entry.args)]; int i; info->op = PTRACE_SYSCALL_INFO_ENTRY; info->entry.nr = syscall_get_nr(child, regs); syscall_get_arguments(child, regs, args); for (i = 0; i < ARRAY_SIZE(args); i++) info->entry.args[i] = args[i]; /* args is the last field in struct ptrace_syscall_info.entry */ return offsetofend(struct ptrace_syscall_info, entry.args); } static unsigned long ptrace_get_syscall_info_seccomp(struct task_struct *child, struct pt_regs *regs, struct ptrace_syscall_info *info) { /* * As struct ptrace_syscall_info.entry is currently a subset * of struct ptrace_syscall_info.seccomp, it makes sense to * initialize that subset using ptrace_get_syscall_info_entry(). * This can be reconsidered in the future if these structures * diverge significantly enough. */ ptrace_get_syscall_info_entry(child, regs, info); info->op = PTRACE_SYSCALL_INFO_SECCOMP; info->seccomp.ret_data = child->ptrace_message; /* ret_data is the last field in struct ptrace_syscall_info.seccomp */ return offsetofend(struct ptrace_syscall_info, seccomp.ret_data); } static unsigned long ptrace_get_syscall_info_exit(struct task_struct *child, struct pt_regs *regs, struct ptrace_syscall_info *info) { info->op = PTRACE_SYSCALL_INFO_EXIT; info->exit.rval = syscall_get_error(child, regs); info->exit.is_error = !!info->exit.rval; if (!info->exit.is_error) info->exit.rval = syscall_get_return_value(child, regs); /* is_error is the last field in struct ptrace_syscall_info.exit */ return offsetofend(struct ptrace_syscall_info, exit.is_error); } static int ptrace_get_syscall_info(struct task_struct *child, unsigned long user_size, void __user *datavp) { struct pt_regs *regs = task_pt_regs(child); struct ptrace_syscall_info info = { .op = PTRACE_SYSCALL_INFO_NONE, .arch = syscall_get_arch(child), .instruction_pointer = instruction_pointer(regs), .stack_pointer = user_stack_pointer(regs), }; unsigned long actual_size = offsetof(struct ptrace_syscall_info, entry); unsigned long write_size; /* * This does not need lock_task_sighand() to access * child->last_siginfo because ptrace_freeze_traced() * called earlier by ptrace_check_attach() ensures that * the tracee cannot go away and clear its last_siginfo. */ switch (child->last_siginfo ? child->last_siginfo->si_code : 0) { case SIGTRAP | 0x80: switch (child->ptrace_message) { case PTRACE_EVENTMSG_SYSCALL_ENTRY: actual_size = ptrace_get_syscall_info_entry(child, regs, &info); break; case PTRACE_EVENTMSG_SYSCALL_EXIT: actual_size = ptrace_get_syscall_info_exit(child, regs, &info); break; } break; case SIGTRAP | (PTRACE_EVENT_SECCOMP << 8): actual_size = ptrace_get_syscall_info_seccomp(child, regs, &info); break; } write_size = min(actual_size, user_size); return copy_to_user(datavp, &info, write_size) ? -EFAULT : actual_size; } #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ int ptrace_request(struct task_struct *child, long request, unsigned long addr, unsigned long data) { bool seized = child->ptrace & PT_SEIZED; int ret = -EIO; kernel_siginfo_t siginfo, *si; void __user *datavp = (void __user *) data; unsigned long __user *datalp = datavp; unsigned long flags; switch (request) { case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: return generic_ptrace_peekdata(child, addr, data); case PTRACE_POKETEXT: case PTRACE_POKEDATA: return generic_ptrace_pokedata(child, addr, data); #ifdef PTRACE_OLDSETOPTIONS case PTRACE_OLDSETOPTIONS: #endif case PTRACE_SETOPTIONS: ret = ptrace_setoptions(child, data); break; case PTRACE_GETEVENTMSG: ret = put_user(child->ptrace_message, datalp); break; case PTRACE_PEEKSIGINFO: ret = ptrace_peek_siginfo(child, addr, data); break; case PTRACE_GETSIGINFO: ret = ptrace_getsiginfo(child, &siginfo); if (!ret) ret = copy_siginfo_to_user(datavp, &siginfo); break; case PTRACE_SETSIGINFO: ret = copy_siginfo_from_user(&siginfo, datavp); if (!ret) ret = ptrace_setsiginfo(child, &siginfo); break; case PTRACE_GETSIGMASK: { sigset_t *mask; if (addr != sizeof(sigset_t)) { ret = -EINVAL; break; } if (test_tsk_restore_sigmask(child)) mask = &child->saved_sigmask; else mask = &child->blocked; if (copy_to_user(datavp, mask, sizeof(sigset_t))) ret = -EFAULT; else ret = 0; break; } case PTRACE_SETSIGMASK: { sigset_t new_set; if (addr != sizeof(sigset_t)) { ret = -EINVAL; break; } if (copy_from_user(&new_set, datavp, sizeof(sigset_t))) { ret = -EFAULT; break; } sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP)); /* * Every thread does recalc_sigpending() after resume, so * retarget_shared_pending() and recalc_sigpending() are not * called here. */ spin_lock_irq(&child->sighand->siglock); child->blocked = new_set; spin_unlock_irq(&child->sighand->siglock); clear_tsk_restore_sigmask(child); ret = 0; break; } case PTRACE_INTERRUPT: /* * Stop tracee without any side-effect on signal or job * control. At least one trap is guaranteed to happen * after this request. If @child is already trapped, the * current trap is not disturbed and another trap will * happen after the current trap is ended with PTRACE_CONT. * * The actual trap might not be PTRACE_EVENT_STOP trap but * the pending condition is cleared regardless. */ if (unlikely(!seized || !lock_task_sighand(child, &flags))) break; /* * INTERRUPT doesn't disturb existing trap sans one * exception. If ptracer issued LISTEN for the current * STOP, this INTERRUPT should clear LISTEN and re-trap * tracee into STOP. */ if (likely(task_set_jobctl_pending(child, JOBCTL_TRAP_STOP))) ptrace_signal_wake_up(child, child->jobctl & JOBCTL_LISTENING); unlock_task_sighand(child, &flags); ret = 0; break; case PTRACE_LISTEN: /* * Listen for events. Tracee must be in STOP. It's not * resumed per-se but is not considered to be in TRACED by * wait(2) or ptrace(2). If an async event (e.g. group * stop state change) happens, tracee will enter STOP trap * again. Alternatively, ptracer can issue INTERRUPT to * finish listening and re-trap tracee into STOP. */ if (unlikely(!seized || !lock_task_sighand(child, &flags))) break; si = child->last_siginfo; if (likely(si && (si->si_code >> 8) == PTRACE_EVENT_STOP)) { child->jobctl |= JOBCTL_LISTENING; /* * If NOTIFY is set, it means event happened between * start of this trap and now. Trigger re-trap. */ if (child->jobctl & JOBCTL_TRAP_NOTIFY) ptrace_signal_wake_up(child, true); ret = 0; } unlock_task_sighand(child, &flags); break; case PTRACE_DETACH: /* detach a process that was attached. */ ret = ptrace_detach(child, data); break; #ifdef CONFIG_BINFMT_ELF_FDPIC case PTRACE_GETFDPIC: { struct mm_struct *mm = get_task_mm(child); unsigned long tmp = 0; ret = -ESRCH; if (!mm) break; switch (addr) { case PTRACE_GETFDPIC_EXEC: tmp = mm->context.exec_fdpic_loadmap; break; case PTRACE_GETFDPIC_INTERP: tmp = mm->context.interp_fdpic_loadmap; break; default: break; } mmput(mm); ret = put_user(tmp, datalp); break; } #endif #ifdef PTRACE_SINGLESTEP case PTRACE_SINGLESTEP: #endif #ifdef PTRACE_SINGLEBLOCK case PTRACE_SINGLEBLOCK: #endif #ifdef PTRACE_SYSEMU case PTRACE_SYSEMU: case PTRACE_SYSEMU_SINGLESTEP: #endif case PTRACE_SYSCALL: case PTRACE_CONT: return ptrace_resume(child, request, data); case PTRACE_KILL: if (child->exit_state) /* already dead */ return 0; return ptrace_resume(child, request, SIGKILL); #ifdef CONFIG_HAVE_ARCH_TRACEHOOK case PTRACE_GETREGSET: case PTRACE_SETREGSET: { struct iovec kiov; struct iovec __user *uiov = datavp; if (!access_ok(uiov, sizeof(*uiov))) return -EFAULT; if (__get_user(kiov.iov_base, &uiov->iov_base) || __get_user(kiov.iov_len, &uiov->iov_len)) return -EFAULT; ret = ptrace_regset(child, request, addr, &kiov); if (!ret) ret = __put_user(kiov.iov_len, &uiov->iov_len); break; } case PTRACE_GET_SYSCALL_INFO: ret = ptrace_get_syscall_info(child, addr, datavp); break; #endif case PTRACE_SECCOMP_GET_FILTER: ret = seccomp_get_filter(child, addr, datavp); break; case PTRACE_SECCOMP_GET_METADATA: ret = seccomp_get_metadata(child, addr, datavp); break; default: break; } return ret; } #ifndef arch_ptrace_attach #define arch_ptrace_attach(child) do { } while (0) #endif SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, unsigned long, data) { struct task_struct *child; long ret; if (request == PTRACE_TRACEME) { ret = ptrace_traceme(); if (!ret) arch_ptrace_attach(current); goto out; } child = find_get_task_by_vpid(pid); if (!child) { ret = -ESRCH; goto out; } if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { ret = ptrace_attach(child, request, addr, data); /* * Some architectures need to do book-keeping after * a ptrace attach. */ if (!ret) arch_ptrace_attach(child); goto out_put_task_struct; } ret = ptrace_check_attach(child, request == PTRACE_KILL || request == PTRACE_INTERRUPT); if (ret < 0) goto out_put_task_struct; ret = arch_ptrace(child, request, addr, data); if (ret || request != PTRACE_DETACH) ptrace_unfreeze_traced(child); out_put_task_struct: put_task_struct(child); out: return ret; } int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, unsigned long data) { unsigned long tmp; int copied; copied = ptrace_access_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) return -EIO; return put_user(tmp, (unsigned long __user *)data); } int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, unsigned long data) { int copied; copied = ptrace_access_vm(tsk, addr, &data, sizeof(data), FOLL_FORCE | FOLL_WRITE); return (copied == sizeof(data)) ? 0 : -EIO; } #if defined CONFIG_COMPAT int compat_ptrace_request(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data) { compat_ulong_t __user *datap = compat_ptr(data); compat_ulong_t word; kernel_siginfo_t siginfo; int ret; switch (request) { case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: ret = ptrace_access_vm(child, addr, &word, sizeof(word), FOLL_FORCE); if (ret != sizeof(word)) ret = -EIO; else ret = put_user(word, datap); break; case PTRACE_POKETEXT: case PTRACE_POKEDATA: ret = ptrace_access_vm(child, addr, &data, sizeof(data), FOLL_FORCE | FOLL_WRITE); ret = (ret != sizeof(data) ? -EIO : 0); break; case PTRACE_GETEVENTMSG: ret = put_user((compat_ulong_t) child->ptrace_message, datap); break; case PTRACE_GETSIGINFO: ret = ptrace_getsiginfo(child, &siginfo); if (!ret) ret = copy_siginfo_to_user32( (struct compat_siginfo __user *) datap, &siginfo); break; case PTRACE_SETSIGINFO: ret = copy_siginfo_from_user32( &siginfo, (struct compat_siginfo __user *) datap); if (!ret) ret = ptrace_setsiginfo(child, &siginfo); break; #ifdef CONFIG_HAVE_ARCH_TRACEHOOK case PTRACE_GETREGSET: case PTRACE_SETREGSET: { struct iovec kiov; struct compat_iovec __user *uiov = (struct compat_iovec __user *) datap; compat_uptr_t ptr; compat_size_t len; if (!access_ok(uiov, sizeof(*uiov))) return -EFAULT; if (__get_user(ptr, &uiov->iov_base) || __get_user(len, &uiov->iov_len)) return -EFAULT; kiov.iov_base = compat_ptr(ptr); kiov.iov_len = len; ret = ptrace_regset(child, request, addr, &kiov); if (!ret) ret = __put_user(kiov.iov_len, &uiov->iov_len); break; } #endif default: ret = ptrace_request(child, request, addr, data); } return ret; } COMPAT_SYSCALL_DEFINE4(ptrace, compat_long_t, request, compat_long_t, pid, compat_long_t, addr, compat_long_t, data) { struct task_struct *child; long ret; if (request == PTRACE_TRACEME) { ret = ptrace_traceme(); goto out; } child = find_get_task_by_vpid(pid); if (!child) { ret = -ESRCH; goto out; } if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { ret = ptrace_attach(child, request, addr, data); /* * Some architectures need to do book-keeping after * a ptrace attach. */ if (!ret) arch_ptrace_attach(child); goto out_put_task_struct; } ret = ptrace_check_attach(child, request == PTRACE_KILL || request == PTRACE_INTERRUPT); if (!ret) { ret = compat_arch_ptrace(child, request, addr, data); if (ret || request != PTRACE_DETACH) ptrace_unfreeze_traced(child); } out_put_task_struct: put_task_struct(child); out: return ret; } #endif /* CONFIG_COMPAT */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BYTEORDER_GENERIC_H #define _LINUX_BYTEORDER_GENERIC_H /* * linux/byteorder/generic.h * Generic Byte-reordering support * * The "... p" macros, like le64_to_cpup, can be used with pointers * to unaligned data, but there will be a performance penalty on * some architectures. Use get_unaligned for unaligned data. * * Francois-Rene Rideau <fare@tunes.org> 19970707 * gathered all the good ideas from all asm-foo/byteorder.h into one file, * cleaned them up. * I hope it is compliant with non-GCC compilers. * I decided to put __BYTEORDER_HAS_U64__ in byteorder.h, * because I wasn't sure it would be ok to put it in types.h * Upgraded it to 2.1.43 * Francois-Rene Rideau <fare@tunes.org> 19971012 * Upgraded it to 2.1.57 * to please Linus T., replaced huge #ifdef's between little/big endian * by nestedly #include'd files. * Francois-Rene Rideau <fare@tunes.org> 19971205 * Made it to 2.1.71; now a facelift: * Put files under include/linux/byteorder/ * Split swab from generic support. * * TODO: * = Regular kernel maintainers could also replace all these manual * byteswap macros that remain, disseminated among drivers, * after some grep or the sources... * = Linus might want to rename all these macros and files to fit his taste, * to fit his personal naming scheme. * = it seems that a few drivers would also appreciate * nybble swapping support... * = every architecture could add their byteswap macro in asm/byteorder.h * see how some architectures already do (i386, alpha, ppc, etc) * = cpu_to_beXX and beXX_to_cpu might some day need to be well * distinguished throughout the kernel. This is not the case currently, * since little endian, big endian, and pdp endian machines needn't it. * But this might be the case for, say, a port of Linux to 20/21 bit * architectures (and F21 Linux addict around?). */ /* * The following macros are to be defined by <asm/byteorder.h>: * * Conversion of long and short int between network and host format * ntohl(__u32 x) * ntohs(__u16 x) * htonl(__u32 x) * htons(__u16 x) * It seems that some programs (which? where? or perhaps a standard? POSIX?) * might like the above to be functions, not macros (why?). * if that's true, then detect them, and take measures. * Anyway, the measure is: define only ___ntohl as a macro instead, * and in a separate file, have * unsigned long inline ntohl(x){return ___ntohl(x);} * * The same for constant arguments * __constant_ntohl(__u32 x) * __constant_ntohs(__u16 x) * __constant_htonl(__u32 x) * __constant_htons(__u16 x) * * Conversion of XX-bit integers (16- 32- or 64-) * between native CPU format and little/big endian format * 64-bit stuff only defined for proper architectures * cpu_to_[bl]eXX(__uXX x) * [bl]eXX_to_cpu(__uXX x) * * The same, but takes a pointer to the value to convert * cpu_to_[bl]eXXp(__uXX x) * [bl]eXX_to_cpup(__uXX x) * * The same, but change in situ * cpu_to_[bl]eXXs(__uXX x) * [bl]eXX_to_cpus(__uXX x) * * See asm-foo/byteorder.h for examples of how to provide * architecture-optimized versions * */ #define cpu_to_le64 __cpu_to_le64 #define le64_to_cpu __le64_to_cpu #define cpu_to_le32 __cpu_to_le32 #define le32_to_cpu __le32_to_cpu #define cpu_to_le16 __cpu_to_le16 #define le16_to_cpu __le16_to_cpu #define cpu_to_be64 __cpu_to_be64 #define be64_to_cpu __be64_to_cpu #define cpu_to_be32 __cpu_to_be32 #define be32_to_cpu __be32_to_cpu #define cpu_to_be16 __cpu_to_be16 #define be16_to_cpu __be16_to_cpu #define cpu_to_le64p __cpu_to_le64p #define le64_to_cpup __le64_to_cpup #define cpu_to_le32p __cpu_to_le32p #define le32_to_cpup __le32_to_cpup #define cpu_to_le16p __cpu_to_le16p #define le16_to_cpup __le16_to_cpup #define cpu_to_be64p __cpu_to_be64p #define be64_to_cpup __be64_to_cpup #define cpu_to_be32p __cpu_to_be32p #define be32_to_cpup __be32_to_cpup #define cpu_to_be16p __cpu_to_be16p #define be16_to_cpup __be16_to_cpup #define cpu_to_le64s __cpu_to_le64s #define le64_to_cpus __le64_to_cpus #define cpu_to_le32s __cpu_to_le32s #define le32_to_cpus __le32_to_cpus #define cpu_to_le16s __cpu_to_le16s #define le16_to_cpus __le16_to_cpus #define cpu_to_be64s __cpu_to_be64s #define be64_to_cpus __be64_to_cpus #define cpu_to_be32s __cpu_to_be32s #define be32_to_cpus __be32_to_cpus #define cpu_to_be16s __cpu_to_be16s #define be16_to_cpus __be16_to_cpus /* * They have to be macros in order to do the constant folding * correctly - if the argument passed into a inline function * it is no longer constant according to gcc.. */ #undef ntohl #undef ntohs #undef htonl #undef htons #define ___htonl(x) __cpu_to_be32(x) #define ___htons(x) __cpu_to_be16(x) #define ___ntohl(x) __be32_to_cpu(x) #define ___ntohs(x) __be16_to_cpu(x) #define htonl(x) ___htonl(x) #define ntohl(x) ___ntohl(x) #define htons(x) ___htons(x) #define ntohs(x) ___ntohs(x) static inline void le16_add_cpu(__le16 *var, u16 val) { *var = cpu_to_le16(le16_to_cpu(*var) + val); } static inline void le32_add_cpu(__le32 *var, u32 val) { *var = cpu_to_le32(le32_to_cpu(*var) + val); } static inline void le64_add_cpu(__le64 *var, u64 val) { *var = cpu_to_le64(le64_to_cpu(*var) + val); } /* XXX: this stuff can be optimized */ static inline void le32_to_cpu_array(u32 *buf, unsigned int words) { while (words--) { __le32_to_cpus(buf); buf++; } } static inline void cpu_to_le32_array(u32 *buf, unsigned int words) { while (words--) { __cpu_to_le32s(buf); buf++; } } static inline void be16_add_cpu(__be16 *var, u16 val) { *var = cpu_to_be16(be16_to_cpu(*var) + val); } static inline void be32_add_cpu(__be32 *var, u32 val) { *var = cpu_to_be32(be32_to_cpu(*var) + val); } static inline void be64_add_cpu(__be64 *var, u64 val) { *var = cpu_to_be64(be64_to_cpu(*var) + val); } static inline void cpu_to_be32_array(__be32 *dst, const u32 *src, size_t len) { int i; for (i = 0; i < len; i++) dst[i] = cpu_to_be32(src[i]); } static inline void be32_to_cpu_array(u32 *dst, const __be32 *src, size_t len) { int i; for (i = 0; i < len; i++) dst[i] = be32_to_cpu(src[i]); } #endif /* _LINUX_BYTEORDER_GENERIC_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_UNWIND_H #define _ASM_X86_UNWIND_H #include <linux/sched.h> #include <linux/ftrace.h> #include <asm/ptrace.h> #include <asm/stacktrace.h> #define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip)) #define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET) struct unwind_state { struct stack_info stack_info; unsigned long stack_mask; struct task_struct *task; int graph_idx; bool error; #if defined(CONFIG_UNWINDER_ORC) bool signal, full_regs; unsigned long sp, bp, ip; struct pt_regs *regs, *prev_regs; #elif defined(CONFIG_UNWINDER_FRAME_POINTER) bool got_irq; unsigned long *bp, *orig_sp, ip; /* * If non-NULL: The current frame is incomplete and doesn't contain a * valid BP. When looking for the next frame, use this instead of the * non-existent saved BP. */ unsigned long *next_bp; struct pt_regs *regs; #else unsigned long *sp; #endif }; void __unwind_start(struct unwind_state *state, struct task_struct *task, struct pt_regs *regs, unsigned long *first_frame); bool unwind_next_frame(struct unwind_state *state); unsigned long unwind_get_return_address(struct unwind_state *state); unsigned long *unwind_get_return_address_ptr(struct unwind_state *state); static inline bool unwind_done(struct unwind_state *state) { return state->stack_info.type == STACK_TYPE_UNKNOWN; } static inline bool unwind_error(struct unwind_state *state) { return state->error; } static inline void unwind_start(struct unwind_state *state, struct task_struct *task, struct pt_regs *regs, unsigned long *first_frame) { first_frame = first_frame ? : get_stack_pointer(task, regs); __unwind_start(state, task, regs, first_frame); } #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) /* * If 'partial' returns true, only the iret frame registers are valid. */ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, bool *partial) { if (unwind_done(state)) return NULL; if (partial) { #ifdef CONFIG_UNWINDER_ORC *partial = !state->full_regs; #else *partial = false; #endif } return state->regs; } #else static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, bool *partial) { return NULL; } #endif #ifdef CONFIG_UNWINDER_ORC void unwind_init(void); void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size); #else static inline void unwind_init(void) {} static inline void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {} #endif /* * This disables KASAN checking when reading a value from another task's stack, * since the other task could be running on another CPU and could have poisoned * the stack in the meantime. */ #define READ_ONCE_TASK_STACK(task, x) \ ({ \ unsigned long val; \ if (task == current) \ val = READ_ONCE(x); \ else \ val = READ_ONCE_NOCHECK(x); \ val; \ }) static inline bool task_on_another_cpu(struct task_struct *task) { #ifdef CONFIG_SMP return task != current && task->on_cpu; #else return false; #endif } #endif /* _ASM_X86_UNWIND_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_COMPACTION_H #define _LINUX_COMPACTION_H /* * Determines how hard direct compaction should try to succeed. * Lower value means higher priority, analogically to reclaim priority. */ enum compact_priority { COMPACT_PRIO_SYNC_FULL, MIN_COMPACT_PRIORITY = COMPACT_PRIO_SYNC_FULL, COMPACT_PRIO_SYNC_LIGHT, MIN_COMPACT_COSTLY_PRIORITY = COMPACT_PRIO_SYNC_LIGHT, DEF_COMPACT_PRIORITY = COMPACT_PRIO_SYNC_LIGHT, COMPACT_PRIO_ASYNC, INIT_COMPACT_PRIORITY = COMPACT_PRIO_ASYNC }; /* Return values for compact_zone() and try_to_compact_pages() */ /* When adding new states, please adjust include/trace/events/compaction.h */ enum compact_result { /* For more detailed tracepoint output - internal to compaction */ COMPACT_NOT_SUITABLE_ZONE, /* * compaction didn't start as it was not possible or direct reclaim * was more suitable */ COMPACT_SKIPPED, /* compaction didn't start as it was deferred due to past failures */ COMPACT_DEFERRED, /* For more detailed tracepoint output - internal to compaction */ COMPACT_NO_SUITABLE_PAGE, /* compaction should continue to another pageblock */ COMPACT_CONTINUE, /* * The full zone was compacted scanned but wasn't successfull to compact * suitable pages. */ COMPACT_COMPLETE, /* * direct compaction has scanned part of the zone but wasn't successfull * to compact suitable pages. */ COMPACT_PARTIAL_SKIPPED, /* compaction terminated prematurely due to lock contentions */ COMPACT_CONTENDED, /* * direct compaction terminated after concluding that the allocation * should now succeed */ COMPACT_SUCCESS, }; struct alloc_context; /* in mm/internal.h */ /* * Number of free order-0 pages that should be available above given watermark * to make sure compaction has reasonable chance of not running out of free * pages that it needs to isolate as migration target during its work. */ static inline unsigned long compact_gap(unsigned int order) { /* * Although all the isolations for migration are temporary, compaction * free scanner may have up to 1 << order pages on its list and then * try to split an (order - 1) free page. At that point, a gap of * 1 << order might not be enough, so it's safer to require twice that * amount. Note that the number of pages on the list is also * effectively limited by COMPACT_CLUSTER_MAX, as that's the maximum * that the migrate scanner can have isolated on migrate list, and free * scanner is only invoked when the number of isolated free pages is * lower than that. But it's not worth to complicate the formula here * as a bigger gap for higher orders than strictly necessary can also * improve chances of compaction success. */ return 2UL << order; } #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern unsigned int sysctl_compaction_proactiveness; extern int sysctl_compaction_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos); extern int sysctl_extfrag_threshold; extern int sysctl_compact_unevictable_allowed; extern unsigned int extfrag_for_order(struct zone *zone, unsigned int order); extern int fragmentation_index(struct zone *zone, unsigned int order); extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, const struct alloc_context *ac, enum compact_priority prio, struct page **page); extern void reset_isolation_suitable(pg_data_t *pgdat); extern enum compact_result compaction_suitable(struct zone *zone, int order, unsigned int alloc_flags, int highest_zoneidx); extern void defer_compaction(struct zone *zone, int order); extern bool compaction_deferred(struct zone *zone, int order); extern void compaction_defer_reset(struct zone *zone, int order, bool alloc_success); extern bool compaction_restarting(struct zone *zone, int order); /* Compaction has made some progress and retrying makes sense */ static inline bool compaction_made_progress(enum compact_result result) { /* * Even though this might sound confusing this in fact tells us * that the compaction successfully isolated and migrated some * pageblocks. */ if (result == COMPACT_SUCCESS) return true; return false; } /* Compaction has failed and it doesn't make much sense to keep retrying. */ static inline bool compaction_failed(enum compact_result result) { /* All zones were scanned completely and still not result. */ if (result == COMPACT_COMPLETE) return true; return false; } /* Compaction needs reclaim to be performed first, so it can continue. */ static inline bool compaction_needs_reclaim(enum compact_result result) { /* * Compaction backed off due to watermark checks for order-0 * so the regular reclaim has to try harder and reclaim something. */ if (result == COMPACT_SKIPPED) return true; return false; } /* * Compaction has backed off for some reason after doing some work or none * at all. It might be throttling or lock contention. Retrying might be still * worthwhile, but with a higher priority if allowed. */ static inline bool compaction_withdrawn(enum compact_result result) { /* * If compaction is deferred for high-order allocations, it is * because sync compaction recently failed. If this is the case * and the caller requested a THP allocation, we do not want * to heavily disrupt the system, so we fail the allocation * instead of entering direct reclaim. */ if (result == COMPACT_DEFERRED) return true; /* * If compaction in async mode encounters contention or blocks higher * priority task we back off early rather than cause stalls. */ if (result == COMPACT_CONTENDED) return true; /* * Page scanners have met but we haven't scanned full zones so this * is a back off in fact. */ if (result == COMPACT_PARTIAL_SKIPPED) return true; return false; } bool compaction_zonelist_suitable(struct alloc_context *ac, int order, int alloc_flags); extern int kcompactd_run(int nid); extern void kcompactd_stop(int nid); extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int highest_zoneidx); #else static inline void reset_isolation_suitable(pg_data_t *pgdat) { } static inline enum compact_result compaction_suitable(struct zone *zone, int order, int alloc_flags, int highest_zoneidx) { return COMPACT_SKIPPED; } static inline void defer_compaction(struct zone *zone, int order) { } static inline bool compaction_deferred(struct zone *zone, int order) { return true; } static inline bool compaction_made_progress(enum compact_result result) { return false; } static inline bool compaction_failed(enum compact_result result) { return false; } static inline bool compaction_needs_reclaim(enum compact_result result) { return false; } static inline bool compaction_withdrawn(enum compact_result result) { return true; } static inline int kcompactd_run(int nid) { return 0; } static inline void kcompactd_stop(int nid) { } static inline void wakeup_kcompactd(pg_data_t *pgdat, int order, int highest_zoneidx) { } #endif /* CONFIG_COMPACTION */ struct node; #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) extern int compaction_register_node(struct node *node); extern void compaction_unregister_node(struct node *node); #else static inline int compaction_register_node(struct node *node) { return 0; } static inline void compaction_unregister_node(struct node *node) { } #endif /* CONFIG_COMPACTION && CONFIG_SYSFS && CONFIG_NUMA */ #endif /* _LINUX_COMPACTION_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright 2019 Google LLC */ #ifndef __LINUX_BLK_CRYPTO_H #define __LINUX_BLK_CRYPTO_H #include <linux/types.h> enum blk_crypto_mode_num { BLK_ENCRYPTION_MODE_INVALID, BLK_ENCRYPTION_MODE_AES_256_XTS, BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV, BLK_ENCRYPTION_MODE_ADIANTUM, BLK_ENCRYPTION_MODE_MAX, }; #define BLK_CRYPTO_MAX_KEY_SIZE 64 /** * struct blk_crypto_config - an inline encryption key's crypto configuration * @crypto_mode: encryption algorithm this key is for * @data_unit_size: the data unit size for all encryption/decryptions with this * key. This is the size in bytes of each individual plaintext and * ciphertext. This is always a power of 2. It might be e.g. the * filesystem block size or the disk sector size. * @dun_bytes: the maximum number of bytes of DUN used when using this key */ struct blk_crypto_config { enum blk_crypto_mode_num crypto_mode; unsigned int data_unit_size; unsigned int dun_bytes; }; /** * struct blk_crypto_key - an inline encryption key * @crypto_cfg: the crypto configuration (like crypto_mode, key size) for this * key * @data_unit_size_bits: log2 of data_unit_size * @size: size of this key in bytes (determined by @crypto_cfg.crypto_mode) * @raw: the raw bytes of this key. Only the first @size bytes are used. * * A blk_crypto_key is immutable once created, and many bios can reference it at * the same time. It must not be freed until all bios using it have completed * and it has been evicted from all devices on which it may have been used. */ struct blk_crypto_key { struct blk_crypto_config crypto_cfg; unsigned int data_unit_size_bits; unsigned int size; u8 raw[BLK_CRYPTO_MAX_KEY_SIZE]; }; #define BLK_CRYPTO_MAX_IV_SIZE 32 #define BLK_CRYPTO_DUN_ARRAY_SIZE (BLK_CRYPTO_MAX_IV_SIZE / sizeof(u64)) /** * struct bio_crypt_ctx - an inline encryption context * @bc_key: the key, algorithm, and data unit size to use * @bc_dun: the data unit number (starting IV) to use * * A bio_crypt_ctx specifies that the contents of the bio will be encrypted (for * write requests) or decrypted (for read requests) inline by the storage device * or controller, or by the crypto API fallback. */ struct bio_crypt_ctx { const struct blk_crypto_key *bc_key; u64 bc_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; }; #include <linux/blk_types.h> #include <linux/blkdev.h> struct request; struct request_queue; #ifdef CONFIG_BLK_INLINE_ENCRYPTION static inline bool bio_has_crypt_ctx(struct bio *bio) { return bio->bi_crypt_context; } void bio_crypt_set_ctx(struct bio *bio, const struct blk_crypto_key *key, const u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE], gfp_t gfp_mask); bool bio_crypt_dun_is_contiguous(const struct bio_crypt_ctx *bc, unsigned int bytes, const u64 next_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]); int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, enum blk_crypto_mode_num crypto_mode, unsigned int dun_bytes, unsigned int data_unit_size); int blk_crypto_start_using_key(const struct blk_crypto_key *key, struct request_queue *q); int blk_crypto_evict_key(struct request_queue *q, const struct blk_crypto_key *key); bool blk_crypto_config_supported(struct request_queue *q, const struct blk_crypto_config *cfg); #else /* CONFIG_BLK_INLINE_ENCRYPTION */ static inline bool bio_has_crypt_ctx(struct bio *bio) { return false; } #endif /* CONFIG_BLK_INLINE_ENCRYPTION */ int __bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask); /** * bio_crypt_clone - clone bio encryption context * @dst: destination bio * @src: source bio * @gfp_mask: memory allocation flags * * If @src has an encryption context, clone it to @dst. * * Return: 0 on success, -ENOMEM if out of memory. -ENOMEM is only possible if * @gfp_mask doesn't include %__GFP_DIRECT_RECLAIM. */ static inline int bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask) { if (bio_has_crypt_ctx(src)) return __bio_crypt_clone(dst, src, gfp_mask); return 0; } #endif /* __LINUX_BLK_CRYPTO_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_WAIT_BIT_H #define _LINUX_WAIT_BIT_H /* * Linux wait-bit related types and methods: */ #include <linux/wait.h> struct wait_bit_key { void *flags; int bit_nr; unsigned long timeout; }; struct wait_bit_queue_entry { struct wait_bit_key key; struct wait_queue_entry wq_entry; }; #define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ { .flags = word, .bit_nr = bit, } typedef int wait_bit_action_f(struct wait_bit_key *key, int mode); void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit); int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); void wake_up_bit(void *word, int bit); int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode); int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout); int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode); struct wait_queue_head *bit_waitqueue(void *word, int bit); extern void __init wait_bit_init(void); int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); #define DEFINE_WAIT_BIT(name, word, bit) \ struct wait_bit_queue_entry name = { \ .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ .wq_entry = { \ .private = current, \ .func = wake_bit_function, \ .entry = \ LIST_HEAD_INIT((name).wq_entry.entry), \ }, \ } extern int bit_wait(struct wait_bit_key *key, int mode); extern int bit_wait_io(struct wait_bit_key *key, int mode); extern int bit_wait_timeout(struct wait_bit_key *key, int mode); extern int bit_wait_io_timeout(struct wait_bit_key *key, int mode); /** * wait_on_bit - wait for a bit to be cleared * @word: the word being waited on, a kernel virtual address * @bit: the bit of the word being waited on * @mode: the task state to sleep in * * There is a standard hashed waitqueue table for generic use. This * is the part of the hashtable's accessor API that waits on a bit. * For instance, if one were to have waiters on a bitflag, one would * call wait_on_bit() in threads waiting for the bit to clear. * One uses wait_on_bit() where one is waiting for the bit to clear, * but has no intention of setting it. * Returned value will be zero if the bit was cleared, or non-zero * if the process received a signal and the mode permitted wakeup * on that signal. */ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); if (!test_bit(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, mode); } /** * wait_on_bit_io - wait for a bit to be cleared * @word: the word being waited on, a kernel virtual address * @bit: the bit of the word being waited on * @mode: the task state to sleep in * * Use the standard hashed waitqueue table to wait for a bit * to be cleared. This is similar to wait_on_bit(), but calls * io_schedule() instead of schedule() for the actual waiting. * * Returned value will be zero if the bit was cleared, or non-zero * if the process received a signal and the mode permitted wakeup * on that signal. */ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); if (!test_bit(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, mode); } /** * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses * @word: the word being waited on, a kernel virtual address * @bit: the bit of the word being waited on * @mode: the task state to sleep in * @timeout: timeout, in jiffies * * Use the standard hashed waitqueue table to wait for a bit * to be cleared. This is similar to wait_on_bit(), except also takes a * timeout parameter. * * Returned value will be zero if the bit was cleared before the * @timeout elapsed, or non-zero if the @timeout elapsed or process * received a signal and the mode permitted wakeup on that signal. */ static inline int wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, unsigned long timeout) { might_sleep(); if (!test_bit(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, mode, timeout); } /** * wait_on_bit_action - wait for a bit to be cleared * @word: the word being waited on, a kernel virtual address * @bit: the bit of the word being waited on * @action: the function used to sleep, which may take special actions * @mode: the task state to sleep in * * Use the standard hashed waitqueue table to wait for a bit * to be cleared, and allow the waiting action to be specified. * This is like wait_on_bit() but allows fine control of how the waiting * is done. * * Returned value will be zero if the bit was cleared, or non-zero * if the process received a signal and the mode permitted wakeup * on that signal. */ static inline int wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, unsigned mode) { might_sleep(); if (!test_bit(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } /** * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it * @word: the word being waited on, a kernel virtual address * @bit: the bit of the word being waited on * @mode: the task state to sleep in * * There is a standard hashed waitqueue table for generic use. This * is the part of the hashtable's accessor API that waits on a bit * when one intends to set it, for instance, trying to lock bitflags. * For instance, if one were to have waiters trying to set bitflag * and waiting for it to clear before setting it, one would call * wait_on_bit() in threads waiting to be able to set the bit. * One uses wait_on_bit_lock() where one is waiting for the bit to * clear with the intention of setting it, and when done, clearing it. * * Returns zero if the bit was (eventually) found to be clear and was * set. Returns non-zero if a signal was delivered to the process and * the @mode allows that signal to wake the process. */ static inline int wait_on_bit_lock(unsigned long *word, int bit, unsigned mode) { might_sleep(); if (!test_and_set_bit(bit, word)) return 0; return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode); } /** * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it * @word: the word being waited on, a kernel virtual address * @bit: the bit of the word being waited on * @mode: the task state to sleep in * * Use the standard hashed waitqueue table to wait for a bit * to be cleared and then to atomically set it. This is similar * to wait_on_bit(), but calls io_schedule() instead of schedule() * for the actual waiting. * * Returns zero if the bit was (eventually) found to be clear and was * set. Returns non-zero if a signal was delivered to the process and * the @mode allows that signal to wake the process. */ static inline int wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); if (!test_and_set_bit(bit, word)) return 0; return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode); } /** * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it * @word: the word being waited on, a kernel virtual address * @bit: the bit of the word being waited on * @action: the function used to sleep, which may take special actions * @mode: the task state to sleep in * * Use the standard hashed waitqueue table to wait for a bit * to be cleared and then to set it, and allow the waiting action * to be specified. * This is like wait_on_bit() but allows fine control of how the waiting * is done. * * Returns zero if the bit was (eventually) found to be clear and was * set. Returns non-zero if a signal was delivered to the process and * the @mode allows that signal to wake the process. */ static inline int wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action, unsigned mode) { might_sleep(); if (!test_and_set_bit(bit, word)) return 0; return out_of_line_wait_on_bit_lock(word, bit, action, mode); } extern void init_wait_var_entry(struct wait_bit_queue_entry *wbq_entry, void *var, int flags); extern void wake_up_var(void *var); extern wait_queue_head_t *__var_waitqueue(void *p); #define ___wait_var_event(var, condition, state, exclusive, ret, cmd) \ ({ \ __label__ __out; \ struct wait_queue_head *__wq_head = __var_waitqueue(var); \ struct wait_bit_queue_entry __wbq_entry; \ long __ret = ret; /* explicit shadow */ \ \ init_wait_var_entry(&__wbq_entry, var, \ exclusive ? WQ_FLAG_EXCLUSIVE : 0); \ for (;;) { \ long __int = prepare_to_wait_event(__wq_head, \ &__wbq_entry.wq_entry, \ state); \ if (condition) \ break; \ \ if (___wait_is_interruptible(state) && __int) { \ __ret = __int; \ goto __out; \ } \ \ cmd; \ } \ finish_wait(__wq_head, &__wbq_entry.wq_entry); \ __out: __ret; \ }) #define __wait_var_event(var, condition) \ ___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ schedule()) #define wait_var_event(var, condition) \ do { \ might_sleep(); \ if (condition) \ break; \ __wait_var_event(var, condition); \ } while (0) #define __wait_var_event_killable(var, condition) \ ___wait_var_event(var, condition, TASK_KILLABLE, 0, 0, \ schedule()) #define wait_var_event_killable(var, condition) \ ({ \ int __ret = 0; \ might_sleep(); \ if (!(condition)) \ __ret = __wait_var_event_killable(var, condition); \ __ret; \ }) #define __wait_var_event_timeout(var, condition, timeout) \ ___wait_var_event(var, ___wait_cond_timeout(condition), \ TASK_UNINTERRUPTIBLE, 0, timeout, \ __ret = schedule_timeout(__ret)) #define wait_var_event_timeout(var, condition, timeout) \ ({ \ long __ret = timeout; \ might_sleep(); \ if (!___wait_cond_timeout(condition)) \ __ret = __wait_var_event_timeout(var, condition, timeout); \ __ret; \ }) #define __wait_var_event_interruptible(var, condition) \ ___wait_var_event(var, condition, TASK_INTERRUPTIBLE, 0, 0, \ schedule()) #define wait_var_event_interruptible(var, condition) \ ({ \ int __ret = 0; \ might_sleep(); \ if (!(condition)) \ __ret = __wait_var_event_interruptible(var, condition); \ __ret; \ }) /** * clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit * * @bit: the bit of the word being waited on * @word: the word being waited on, a kernel virtual address * * You can use this helper if bitflags are manipulated atomically rather than * non-atomically under a lock. */ static inline void clear_and_wake_up_bit(int bit, void *word) { clear_bit_unlock(bit, word); /* See wake_up_bit() for which memory barrier you need to use. */ smp_mb__after_atomic(); wake_up_bit(word, bit); } #endif /* _LINUX_WAIT_BIT_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 /* SPDX-License-Identifier: GPL-2.0-only */ /* * V9FS definitions. * * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> */ #ifndef FS_9P_V9FS_H #define FS_9P_V9FS_H #include <linux/backing-dev.h> /** * enum p9_session_flags - option flags for each 9P session * @V9FS_PROTO_2000U: whether or not to use 9P2000.u extensions * @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy * @V9FS_ACCESS_USER: a new attach will be issued for every user (default) * @V9FS_ACCESS_CLIENT: Just like user, but access check is performed on client. * @V9FS_ACCESS_ANY: use a single attach for all users * @V9FS_ACCESS_MASK: bit mask of different ACCESS options * @V9FS_POSIX_ACL: POSIX ACLs are enforced * * Session flags reflect options selected by users at mount time */ #define V9FS_ACCESS_ANY (V9FS_ACCESS_SINGLE | \ V9FS_ACCESS_USER | \ V9FS_ACCESS_CLIENT) #define V9FS_ACCESS_MASK V9FS_ACCESS_ANY #define V9FS_ACL_MASK V9FS_POSIX_ACL enum p9_session_flags { V9FS_PROTO_2000U = 0x01, V9FS_PROTO_2000L = 0x02, V9FS_ACCESS_SINGLE = 0x04, V9FS_ACCESS_USER = 0x08, V9FS_ACCESS_CLIENT = 0x10, V9FS_POSIX_ACL = 0x20 }; /* possible values of ->cache */ /** * enum p9_cache_modes - user specified cache preferences * @CACHE_NONE: do not cache data, dentries, or directory contents (default) * @CACHE_LOOSE: cache data, dentries, and directory contents w/no consistency * * eventually support loose, tight, time, session, default always none */ enum p9_cache_modes { CACHE_NONE, CACHE_MMAP, CACHE_LOOSE, CACHE_FSCACHE, nr__p9_cache_modes }; /** * struct v9fs_session_info - per-instance session information * @flags: session options of type &p9_session_flags * @nodev: set to 1 to disable device mapping * @debug: debug level * @afid: authentication handle * @cache: cache mode of type &p9_cache_modes * @cachetag: the tag of the cache associated with this session * @fscache: session cookie associated with FS-Cache * @uname: string user name to mount hierarchy as * @aname: mount specifier for remote hierarchy * @maxdata: maximum data to be sent/recvd per protocol message * @dfltuid: default numeric userid to mount hierarchy as * @dfltgid: default numeric groupid to mount hierarchy as * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy * @clnt: reference to 9P network client instantiated for this session * @slist: reference to list of registered 9p sessions * * This structure holds state for each session instance established during * a sys_mount() . * * Bugs: there seems to be a lot of state which could be condensed and/or * removed. */ struct v9fs_session_info { /* options */ unsigned char flags; unsigned char nodev; unsigned short debug; unsigned int afid; unsigned int cache; #ifdef CONFIG_9P_FSCACHE char *cachetag; struct fscache_cookie *fscache; #endif char *uname; /* user name to mount as */ char *aname; /* name of remote hierarchy being mounted */ unsigned int maxdata; /* max data for client interface */ kuid_t dfltuid; /* default uid/muid for legacy support */ kgid_t dfltgid; /* default gid for legacy support */ kuid_t uid; /* if ACCESS_SINGLE, the uid that has access */ struct p9_client *clnt; /* 9p client */ struct list_head slist; /* list of sessions registered with v9fs */ struct rw_semaphore rename_sem; long session_lock_timeout; /* retry interval for blocking locks */ }; /* cache_validity flags */ #define V9FS_INO_INVALID_ATTR 0x01 struct v9fs_inode { #ifdef CONFIG_9P_FSCACHE struct mutex fscache_lock; struct fscache_cookie *fscache; #endif struct p9_qid qid; unsigned int cache_validity; struct p9_fid *writeback_fid; struct mutex v_mutex; struct inode vfs_inode; }; static inline struct v9fs_inode *V9FS_I(const struct inode *inode) { return container_of(inode, struct v9fs_inode, vfs_inode); } extern int v9fs_show_options(struct seq_file *m, struct dentry *root); struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, char *); extern void v9fs_session_close(struct v9fs_session_info *v9ses); extern void v9fs_session_cancel(struct v9fs_session_info *v9ses); extern void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses); extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d); extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d); extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb, int new); extern const struct inode_operations v9fs_dir_inode_operations_dotl; extern const struct inode_operations v9fs_file_inode_operations_dotl; extern const struct inode_operations v9fs_symlink_inode_operations_dotl; extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb, int new); /* other default globals */ #define V9FS_PORT 564 #define V9FS_DEFUSER "nobody" #define V9FS_DEFANAME "" #define V9FS_DEFUID KUIDT_INIT(-2) #define V9FS_DEFGID KGIDT_INIT(-2) static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode) { return (inode->i_sb->s_fs_info); } static inline struct v9fs_session_info *v9fs_dentry2v9ses(struct dentry *dentry) { return dentry->d_sb->s_fs_info; } static inline int v9fs_proto_dotu(struct v9fs_session_info *v9ses) { return v9ses->flags & V9FS_PROTO_2000U; } static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses) { return v9ses->flags & V9FS_PROTO_2000L; } /** * v9fs_get_inode_from_fid - Helper routine to populate an inode by * issuing a attribute request * @v9ses: session information * @fid: fid to issue attribute request for * @sb: superblock on which to create inode * */ static inline struct inode * v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb) { if (v9fs_proto_dotl(v9ses)) return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 0); else return v9fs_inode_from_fid(v9ses, fid, sb, 0); } /** * v9fs_get_new_inode_from_fid - Helper routine to populate an inode by * issuing a attribute request * @v9ses: session information * @fid: fid to issue attribute request for * @sb: superblock on which to create inode * */ static inline struct inode * v9fs_get_new_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb) { if (v9fs_proto_dotl(v9ses)) return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 1); else return v9fs_inode_from_fid(v9ses, fid, sb, 1); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM vmscan #if !defined(_TRACE_VMSCAN_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_VMSCAN_H #include <linux/types.h> #include <linux/tracepoint.h> #include <linux/mm.h> #include <linux/memcontrol.h> #include <trace/events/mmflags.h> #define RECLAIM_WB_ANON 0x0001u #define RECLAIM_WB_FILE 0x0002u #define RECLAIM_WB_MIXED 0x0010u #define RECLAIM_WB_SYNC 0x0004u /* Unused, all reclaim async */ #define RECLAIM_WB_ASYNC 0x0008u #define RECLAIM_WB_LRU (RECLAIM_WB_ANON|RECLAIM_WB_FILE) #define show_reclaim_flags(flags) \ (flags) ? __print_flags(flags, "|", \ {RECLAIM_WB_ANON, "RECLAIM_WB_ANON"}, \ {RECLAIM_WB_FILE, "RECLAIM_WB_FILE"}, \ {RECLAIM_WB_MIXED, "RECLAIM_WB_MIXED"}, \ {RECLAIM_WB_SYNC, "RECLAIM_WB_SYNC"}, \ {RECLAIM_WB_ASYNC, "RECLAIM_WB_ASYNC"} \ ) : "RECLAIM_WB_NONE" #define trace_reclaim_flags(file) ( \ (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \ (RECLAIM_WB_ASYNC) \ ) TRACE_EVENT(mm_vmscan_kswapd_sleep, TP_PROTO(int nid), TP_ARGS(nid), TP_STRUCT__entry( __field( int, nid ) ), TP_fast_assign( __entry->nid = nid; ), TP_printk("nid=%d", __entry->nid) ); TRACE_EVENT(mm_vmscan_kswapd_wake, TP_PROTO(int nid, int zid, int order), TP_ARGS(nid, zid, order), TP_STRUCT__entry( __field( int, nid ) __field( int, zid ) __field( int, order ) ), TP_fast_assign( __entry->nid = nid; __entry->zid = zid; __entry->order = order; ), TP_printk("nid=%d order=%d", __entry->nid, __entry->order) ); TRACE_EVENT(mm_vmscan_wakeup_kswapd, TP_PROTO(int nid, int zid, int order, gfp_t gfp_flags), TP_ARGS(nid, zid, order, gfp_flags), TP_STRUCT__entry( __field( int, nid ) __field( int, zid ) __field( int, order ) __field( gfp_t, gfp_flags ) ), TP_fast_assign( __entry->nid = nid; __entry->zid = zid; __entry->order = order; __entry->gfp_flags = gfp_flags; ), TP_printk("nid=%d order=%d gfp_flags=%s", __entry->nid, __entry->order, show_gfp_flags(__entry->gfp_flags)) ); DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_begin_template, TP_PROTO(int order, gfp_t gfp_flags), TP_ARGS(order, gfp_flags), TP_STRUCT__entry( __field( int, order ) __field( gfp_t, gfp_flags ) ), TP_fast_assign( __entry->order = order; __entry->gfp_flags = gfp_flags; ), TP_printk("order=%d gfp_flags=%s", __entry->order, show_gfp_flags(__entry->gfp_flags)) ); DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_direct_reclaim_begin, TP_PROTO(int order, gfp_t gfp_flags), TP_ARGS(order, gfp_flags) ); #ifdef CONFIG_MEMCG DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin, TP_PROTO(int order, gfp_t gfp_flags), TP_ARGS(order, gfp_flags) ); DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin, TP_PROTO(int order, gfp_t gfp_flags), TP_ARGS(order, gfp_flags) ); #endif /* CONFIG_MEMCG */ DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_end_template, TP_PROTO(unsigned long nr_reclaimed), TP_ARGS(nr_reclaimed), TP_STRUCT__entry( __field( unsigned long, nr_reclaimed ) ), TP_fast_assign( __entry->nr_reclaimed = nr_reclaimed; ), TP_printk("nr_reclaimed=%lu", __entry->nr_reclaimed) ); DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_direct_reclaim_end, TP_PROTO(unsigned long nr_reclaimed), TP_ARGS(nr_reclaimed) ); #ifdef CONFIG_MEMCG DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_reclaim_end, TP_PROTO(unsigned long nr_reclaimed), TP_ARGS(nr_reclaimed) ); DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_reclaim_end, TP_PROTO(unsigned long nr_reclaimed), TP_ARGS(nr_reclaimed) ); #endif /* CONFIG_MEMCG */ TRACE_EVENT(mm_shrink_slab_start, TP_PROTO(struct shrinker *shr, struct shrink_control *sc, long nr_objects_to_shrink, unsigned long cache_items, unsigned long long delta, unsigned long total_scan, int priority), TP_ARGS(shr, sc, nr_objects_to_shrink, cache_items, delta, total_scan, priority), TP_STRUCT__entry( __field(struct shrinker *, shr) __field(void *, shrink) __field(int, nid) __field(long, nr_objects_to_shrink) __field(gfp_t, gfp_flags) __field(unsigned long, cache_items) __field(unsigned long long, delta) __field(unsigned long, total_scan) __field(int, priority) ), TP_fast_assign( __entry->shr = shr; __entry->shrink = shr->scan_objects; __entry->nid = sc->nid; __entry->nr_objects_to_shrink = nr_objects_to_shrink; __entry->gfp_flags = sc->gfp_mask; __entry->cache_items = cache_items; __entry->delta = delta; __entry->total_scan = total_scan; __entry->priority = priority; ), TP_printk("%pS %p: nid: %d objects to shrink %ld gfp_flags %s cache items %ld delta %lld total_scan %ld priority %d", __entry->shrink, __entry->shr, __entry->nid, __entry->nr_objects_to_shrink, show_gfp_flags(__entry->gfp_flags), __entry->cache_items, __entry->delta, __entry->total_scan, __entry->priority) ); TRACE_EVENT(mm_shrink_slab_end, TP_PROTO(struct shrinker *shr, int nid, int shrinker_retval, long unused_scan_cnt, long new_scan_cnt, long total_scan), TP_ARGS(shr, nid, shrinker_retval, unused_scan_cnt, new_scan_cnt, total_scan), TP_STRUCT__entry( __field(struct shrinker *, shr) __field(int, nid) __field(void *, shrink) __field(long, unused_scan) __field(long, new_scan) __field(int, retval) __field(long, total_scan) ), TP_fast_assign( __entry->shr = shr; __entry->nid = nid; __entry->shrink = shr->scan_objects; __entry->unused_scan = unused_scan_cnt; __entry->new_scan = new_scan_cnt; __entry->retval = shrinker_retval; __entry->total_scan = total_scan; ), TP_printk("%pS %p: nid: %d unused scan count %ld new scan count %ld total_scan %ld last shrinker return val %d", __entry->shrink, __entry->shr, __entry->nid, __entry->unused_scan, __entry->new_scan, __entry->total_scan, __entry->retval) ); TRACE_EVENT(mm_vmscan_lru_isolate, TP_PROTO(int highest_zoneidx, int order, unsigned long nr_requested, unsigned long nr_scanned, unsigned long nr_skipped, unsigned long nr_taken, isolate_mode_t isolate_mode, int lru), TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, isolate_mode, lru), TP_STRUCT__entry( __field(int, highest_zoneidx) __field(int, order) __field(unsigned long, nr_requested) __field(unsigned long, nr_scanned) __field(unsigned long, nr_skipped) __field(unsigned long, nr_taken) __field(isolate_mode_t, isolate_mode) __field(int, lru) ), TP_fast_assign( __entry->highest_zoneidx = highest_zoneidx; __entry->order = order; __entry->nr_requested = nr_requested; __entry->nr_scanned = nr_scanned; __entry->nr_skipped = nr_skipped; __entry->nr_taken = nr_taken; __entry->isolate_mode = isolate_mode; __entry->lru = lru; ), /* * classzone is previous name of the highest_zoneidx. * Reason not to change it is the ABI requirement of the tracepoint. */ TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s", __entry->isolate_mode, __entry->highest_zoneidx, __entry->order, __entry->nr_requested, __entry->nr_scanned, __entry->nr_skipped, __entry->nr_taken, __print_symbolic(__entry->lru, LRU_NAMES)) ); TRACE_EVENT(mm_vmscan_writepage, TP_PROTO(struct page *page), TP_ARGS(page), TP_STRUCT__entry( __field(unsigned long, pfn) __field(int, reclaim_flags) ), TP_fast_assign( __entry->pfn = page_to_pfn(page); __entry->reclaim_flags = trace_reclaim_flags( page_is_file_lru(page)); ), TP_printk("page=%p pfn=%lu flags=%s", pfn_to_page(__entry->pfn), __entry->pfn, show_reclaim_flags(__entry->reclaim_flags)) ); TRACE_EVENT(mm_vmscan_lru_shrink_inactive, TP_PROTO(int nid, unsigned long nr_scanned, unsigned long nr_reclaimed, struct reclaim_stat *stat, int priority, int file), TP_ARGS(nid, nr_scanned, nr_reclaimed, stat, priority, file), TP_STRUCT__entry( __field(int, nid) __field(unsigned long, nr_scanned) __field(unsigned long, nr_reclaimed) __field(unsigned long, nr_dirty) __field(unsigned long, nr_writeback) __field(unsigned long, nr_congested) __field(unsigned long, nr_immediate) __field(unsigned int, nr_activate0) __field(unsigned int, nr_activate1) __field(unsigned long, nr_ref_keep) __field(unsigned long, nr_unmap_fail) __field(int, priority) __field(int, reclaim_flags) ), TP_fast_assign( __entry->nid = nid; __entry->nr_scanned = nr_scanned; __entry->nr_reclaimed = nr_reclaimed; __entry->nr_dirty = stat->nr_dirty; __entry->nr_writeback = stat->nr_writeback; __entry->nr_congested = stat->nr_congested; __entry->nr_immediate = stat->nr_immediate; __entry->nr_activate0 = stat->nr_activate[0]; __entry->nr_activate1 = stat->nr_activate[1]; __entry->nr_ref_keep = stat->nr_ref_keep; __entry->nr_unmap_fail = stat->nr_unmap_fail; __entry->priority = priority; __entry->reclaim_flags = trace_reclaim_flags(file); ), TP_printk("nid=%d nr_scanned=%ld nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s", __entry->nid, __entry->nr_scanned, __entry->nr_reclaimed, __entry->nr_dirty, __entry->nr_writeback, __entry->nr_congested, __entry->nr_immediate, __entry->nr_activate0, __entry->nr_activate1, __entry->nr_ref_keep, __entry->nr_unmap_fail, __entry->priority, show_reclaim_flags(__entry->reclaim_flags)) ); TRACE_EVENT(mm_vmscan_lru_shrink_active, TP_PROTO(int nid, unsigned long nr_taken, unsigned long nr_active, unsigned long nr_deactivated, unsigned long nr_referenced, int priority, int file), TP_ARGS(nid, nr_taken, nr_active, nr_deactivated, nr_referenced, priority, file), TP_STRUCT__entry( __field(int, nid) __field(unsigned long, nr_taken) __field(unsigned long, nr_active) __field(unsigned long, nr_deactivated) __field(unsigned long, nr_referenced) __field(int, priority) __field(int, reclaim_flags) ), TP_fast_assign( __entry->nid = nid; __entry->nr_taken = nr_taken; __entry->nr_active = nr_active; __entry->nr_deactivated = nr_deactivated; __entry->nr_referenced = nr_referenced; __entry->priority = priority; __entry->reclaim_flags = trace_reclaim_flags(file); ), TP_printk("nid=%d nr_taken=%ld nr_active=%ld nr_deactivated=%ld nr_referenced=%ld priority=%d flags=%s", __entry->nid, __entry->nr_taken, __entry->nr_active, __entry->nr_deactivated, __entry->nr_referenced, __entry->priority, show_reclaim_flags(__entry->reclaim_flags)) ); TRACE_EVENT(mm_vmscan_inactive_list_is_low, TP_PROTO(int nid, int reclaim_idx, unsigned long total_inactive, unsigned long inactive, unsigned long total_active, unsigned long active, unsigned long ratio, int file), TP_ARGS(nid, reclaim_idx, total_inactive, inactive, total_active, active, ratio, file), TP_STRUCT__entry( __field(int, nid) __field(int, reclaim_idx) __field(unsigned long, total_inactive) __field(unsigned long, inactive) __field(unsigned long, total_active) __field(unsigned long, active) __field(unsigned long, ratio) __field(int, reclaim_flags) ), TP_fast_assign( __entry->nid = nid; __entry->reclaim_idx = reclaim_idx; __entry->total_inactive = total_inactive; __entry->inactive = inactive; __entry->total_active = total_active; __entry->active = active; __entry->ratio = ratio; __entry->reclaim_flags = trace_reclaim_flags(file) & RECLAIM_WB_LRU; ), TP_printk("nid=%d reclaim_idx=%d total_inactive=%ld inactive=%ld total_active=%ld active=%ld ratio=%ld flags=%s", __entry->nid, __entry->reclaim_idx, __entry->total_inactive, __entry->inactive, __entry->total_active, __entry->active, __entry->ratio, show_reclaim_flags(__entry->reclaim_flags)) ); TRACE_EVENT(mm_vmscan_node_reclaim_begin, TP_PROTO(int nid, int order, gfp_t gfp_flags), TP_ARGS(nid, order, gfp_flags), TP_STRUCT__entry( __field(int, nid) __field(int, order) __field(gfp_t, gfp_flags) ), TP_fast_assign( __entry->nid = nid; __entry->order = order; __entry->gfp_flags = gfp_flags; ), TP_printk("nid=%d order=%d gfp_flags=%s", __entry->nid, __entry->order, show_gfp_flags(__entry->gfp_flags)) ); DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_node_reclaim_end, TP_PROTO(unsigned long nr_reclaimed), TP_ARGS(nr_reclaimed) ); #endif /* _TRACE_VMSCAN_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_MROUTE_H #define __LINUX_MROUTE_H #include <linux/in.h> #include <linux/pim.h> #include <net/fib_rules.h> #include <net/fib_notifier.h> #include <uapi/linux/mroute.h> #include <linux/mroute_base.h> #include <linux/sockptr.h> #ifdef CONFIG_IP_MROUTE static inline int ip_mroute_opt(int opt) { return opt >= MRT_BASE && opt <= MRT_MAX; } int ip_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int); int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *); int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg); int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); int ip_mr_init(void); bool ipmr_rule_default(const struct fib_rule *rule); #else static inline int ip_mroute_setsockopt(struct sock *sock, int optname, sockptr_t optval, unsigned int optlen) { return -ENOPROTOOPT; } static inline int ip_mroute_getsockopt(struct sock *sock, int optname, char __user *optval, int __user *optlen) { return -ENOPROTOOPT; } static inline int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) { return -ENOIOCTLCMD; } static inline int ip_mr_init(void) { return 0; } static inline int ip_mroute_opt(int opt) { return 0; } static inline bool ipmr_rule_default(const struct fib_rule *rule) { return true; } #endif #define VIFF_STATIC 0x8000 struct mfc_cache_cmp_arg { __be32 mfc_mcastgrp; __be32 mfc_origin; }; /** * struct mfc_cache - multicast routing entries * @_c: Common multicast routing information; has to be first [for casting] * @mfc_mcastgrp: destination multicast group address * @mfc_origin: source address * @cmparg: used for rhashtable comparisons */ struct mfc_cache { struct mr_mfc _c; union { struct { __be32 mfc_mcastgrp; __be32 mfc_origin; }; struct mfc_cache_cmp_arg cmparg; }; }; struct rtmsg; int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, struct rtmsg *rtm, u32 portid); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 /* This file is automatically generated. Do not edit. */ #ifndef _SELINUX_FLASK_H_ #define _SELINUX_FLASK_H_ #define SECCLASS_SECURITY 1 #define SECCLASS_PROCESS 2 #define SECCLASS_PROCESS2 3 #define SECCLASS_SYSTEM 4 #define SECCLASS_CAPABILITY 5 #define SECCLASS_FILESYSTEM 6 #define SECCLASS_FILE 7 #define SECCLASS_DIR 8 #define SECCLASS_FD 9 #define SECCLASS_LNK_FILE 10 #define SECCLASS_CHR_FILE 11 #define SECCLASS_BLK_FILE 12 #define SECCLASS_SOCK_FILE 13 #define SECCLASS_FIFO_FILE 14 #define SECCLASS_SOCKET 15 #define SECCLASS_TCP_SOCKET 16 #define SECCLASS_UDP_SOCKET 17 #define SECCLASS_RAWIP_SOCKET 18 #define SECCLASS_NODE 19 #define SECCLASS_NETIF 20 #define SECCLASS_NETLINK_SOCKET 21 #define SECCLASS_PACKET_SOCKET 22 #define SECCLASS_KEY_SOCKET 23 #define SECCLASS_UNIX_STREAM_SOCKET 24 #define SECCLASS_UNIX_DGRAM_SOCKET 25 #define SECCLASS_SEM 26 #define SECCLASS_MSG 27 #define SECCLASS_MSGQ 28 #define SECCLASS_SHM 29 #define SECCLASS_IPC 30 #define SECCLASS_NETLINK_ROUTE_SOCKET 31 #define SECCLASS_NETLINK_TCPDIAG_SOCKET 32 #define SECCLASS_NETLINK_NFLOG_SOCKET 33 #define SECCLASS_NETLINK_XFRM_SOCKET 34 #define SECCLASS_NETLINK_SELINUX_SOCKET 35 #define SECCLASS_NETLINK_ISCSI_SOCKET 36 #define SECCLASS_NETLINK_AUDIT_SOCKET 37 #define SECCLASS_NETLINK_FIB_LOOKUP_SOCKET 38 #define SECCLASS_NETLINK_CONNECTOR_SOCKET 39 #define SECCLASS_NETLINK_NETFILTER_SOCKET 40 #define SECCLASS_NETLINK_DNRT_SOCKET 41 #define SECCLASS_ASSOCIATION 42 #define SECCLASS_NETLINK_KOBJECT_UEVENT_SOCKET 43 #define SECCLASS_NETLINK_GENERIC_SOCKET 44 #define SECCLASS_NETLINK_SCSITRANSPORT_SOCKET 45 #define SECCLASS_NETLINK_RDMA_SOCKET 46 #define SECCLASS_NETLINK_CRYPTO_SOCKET 47 #define SECCLASS_APPLETALK_SOCKET 48 #define SECCLASS_PACKET 49 #define SECCLASS_KEY 50 #define SECCLASS_DCCP_SOCKET 51 #define SECCLASS_MEMPROTECT 52 #define SECCLASS_PEER 53 #define SECCLASS_CAPABILITY2 54 #define SECCLASS_KERNEL_SERVICE 55 #define SECCLASS_TUN_SOCKET 56 #define SECCLASS_BINDER 57 #define SECCLASS_CAP_USERNS 58 #define SECCLASS_CAP2_USERNS 59 #define SECCLASS_SCTP_SOCKET 60 #define SECCLASS_ICMP_SOCKET 61 #define SECCLASS_AX25_SOCKET 62 #define SECCLASS_IPX_SOCKET 63 #define SECCLASS_NETROM_SOCKET 64 #define SECCLASS_ATMPVC_SOCKET 65 #define SECCLASS_X25_SOCKET 66 #define SECCLASS_ROSE_SOCKET 67 #define SECCLASS_DECNET_SOCKET 68 #define SECCLASS_ATMSVC_SOCKET 69 #define SECCLASS_RDS_SOCKET 70 #define SECCLASS_IRDA_SOCKET 71 #define SECCLASS_PPPOX_SOCKET 72 #define SECCLASS_LLC_SOCKET 73 #define SECCLASS_CAN_SOCKET 74 #define SECCLASS_TIPC_SOCKET 75 #define SECCLASS_BLUETOOTH_SOCKET 76 #define SECCLASS_IUCV_SOCKET 77 #define SECCLASS_RXRPC_SOCKET 78 #define SECCLASS_ISDN_SOCKET 79 #define SECCLASS_PHONET_SOCKET 80 #define SECCLASS_IEEE802154_SOCKET 81 #define SECCLASS_CAIF_SOCKET 82 #define SECCLASS_ALG_SOCKET 83 #define SECCLASS_NFC_SOCKET 84 #define SECCLASS_VSOCK_SOCKET 85 #define SECCLASS_KCM_SOCKET 86 #define SECCLASS_QIPCRTR_SOCKET 87 #define SECCLASS_SMC_SOCKET 88 #define SECCLASS_INFINIBAND_PKEY 89 #define SECCLASS_INFINIBAND_ENDPORT 90 #define SECCLASS_BPF 91 #define SECCLASS_XDP_SOCKET 92 #define SECCLASS_PERF_EVENT 93 #define SECCLASS_LOCKDOWN 94 #define SECINITSID_KERNEL 1 #define SECINITSID_SECURITY 2 #define SECINITSID_UNLABELED 3 #define SECINITSID_FILE 5 #define SECINITSID_ANY_SOCKET 8 #define SECINITSID_PORT 9 #define SECINITSID_NETIF 10 #define SECINITSID_NETMSG 11 #define SECINITSID_NODE 12 #define SECINITSID_DEVNULL 27 #define SECINITSID_NUM 27 static inline bool security_is_socket_class(u16 kern_tclass) { bool sock = false; switch (kern_tclass) { case SECCLASS_SOCKET: case SECCLASS_TCP_SOCKET: case SECCLASS_UDP_SOCKET: case SECCLASS_RAWIP_SOCKET: case SECCLASS_NETLINK_SOCKET: case SECCLASS_PACKET_SOCKET: case SECCLASS_KEY_SOCKET: case SECCLASS_UNIX_STREAM_SOCKET: case SECCLASS_UNIX_DGRAM_SOCKET: case SECCLASS_NETLINK_ROUTE_SOCKET: case SECCLASS_NETLINK_TCPDIAG_SOCKET: case SECCLASS_NETLINK_NFLOG_SOCKET: case SECCLASS_NETLINK_XFRM_SOCKET: case SECCLASS_NETLINK_SELINUX_SOCKET: case SECCLASS_NETLINK_ISCSI_SOCKET: case SECCLASS_NETLINK_AUDIT_SOCKET: case SECCLASS_NETLINK_FIB_LOOKUP_SOCKET: case SECCLASS_NETLINK_CONNECTOR_SOCKET: case SECCLASS_NETLINK_NETFILTER_SOCKET: case SECCLASS_NETLINK_DNRT_SOCKET: case SECCLASS_NETLINK_KOBJECT_UEVENT_SOCKET: case SECCLASS_NETLINK_GENERIC_SOCKET: case SECCLASS_NETLINK_SCSITRANSPORT_SOCKET: case SECCLASS_NETLINK_RDMA_SOCKET: case SECCLASS_NETLINK_CRYPTO_SOCKET: case SECCLASS_APPLETALK_SOCKET: case SECCLASS_DCCP_SOCKET: case SECCLASS_TUN_SOCKET: case SECCLASS_SCTP_SOCKET: case SECCLASS_ICMP_SOCKET: case SECCLASS_AX25_SOCKET: case SECCLASS_IPX_SOCKET: case SECCLASS_NETROM_SOCKET: case SECCLASS_ATMPVC_SOCKET: case SECCLASS_X25_SOCKET: case SECCLASS_ROSE_SOCKET: case SECCLASS_DECNET_SOCKET: case SECCLASS_ATMSVC_SOCKET: case SECCLASS_RDS_SOCKET: case SECCLASS_IRDA_SOCKET: case SECCLASS_PPPOX_SOCKET: case SECCLASS_LLC_SOCKET: case SECCLASS_CAN_SOCKET: case SECCLASS_TIPC_SOCKET: case SECCLASS_BLUETOOTH_SOCKET: case SECCLASS_IUCV_SOCKET: case SECCLASS_RXRPC_SOCKET: case SECCLASS_ISDN_SOCKET: case SECCLASS_PHONET_SOCKET: case SECCLASS_IEEE802154_SOCKET: case SECCLASS_CAIF_SOCKET: case SECCLASS_ALG_SOCKET: case SECCLASS_NFC_SOCKET: case SECCLASS_VSOCK_SOCKET: case SECCLASS_KCM_SOCKET: case SECCLASS_QIPCRTR_SOCKET: case SECCLASS_SMC_SOCKET: case SECCLASS_XDP_SOCKET: sock = true; break; default: break; } return sock; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 // SPDX-License-Identifier: GPL-2.0 #ifndef _LINUX_KERNEL_TRACE_H #define _LINUX_KERNEL_TRACE_H #include <linux/fs.h> #include <linux/atomic.h> #include <linux/sched.h> #include <linux/clocksource.h> #include <linux/ring_buffer.h> #include <linux/mmiotrace.h> #include <linux/tracepoint.h> #include <linux/ftrace.h> #include <linux/trace.h> #include <linux/hw_breakpoint.h> #include <linux/trace_seq.h> #include <linux/trace_events.h> #include <linux/compiler.h> #include <linux/glob.h> #include <linux/irq_work.h> #include <linux/workqueue.h> #include <linux/ctype.h> #ifdef CONFIG_FTRACE_SYSCALLS #include <asm/unistd.h> /* For NR_SYSCALLS */ #include <asm/syscall.h> /* some archs define it here */ #endif enum trace_type { __TRACE_FIRST_TYPE = 0, TRACE_FN, TRACE_CTX, TRACE_WAKE, TRACE_STACK, TRACE_PRINT, TRACE_BPRINT, TRACE_MMIO_RW, TRACE_MMIO_MAP, TRACE_BRANCH, TRACE_GRAPH_RET, TRACE_GRAPH_ENT, TRACE_USER_STACK, TRACE_BLK, TRACE_BPUTS, TRACE_HWLAT, TRACE_RAW_DATA, __TRACE_LAST_TYPE, }; #undef __field #define __field(type, item) type item; #undef __field_fn #define __field_fn(type, item) type item; #undef __field_struct #define __field_struct(type, item) __field(type, item) #undef __field_desc #define __field_desc(type, container, item) #undef __field_packed #define __field_packed(type, container, item) #undef __array #define __array(type, item, size) type item[size]; #undef __array_desc #define __array_desc(type, container, item, size) #undef __dynamic_array #define __dynamic_array(type, item) type item[]; #undef F_STRUCT #define F_STRUCT(args...) args #undef FTRACE_ENTRY #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ struct struct_name { \ struct trace_entry ent; \ tstruct \ } #undef FTRACE_ENTRY_DUP #define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk) #undef FTRACE_ENTRY_REG #define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, regfn) \ FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) #undef FTRACE_ENTRY_PACKED #define FTRACE_ENTRY_PACKED(name, struct_name, id, tstruct, print) \ FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) __packed #include "trace_entries.h" /* Use this for memory failure errors */ #define MEM_FAIL(condition, fmt, ...) ({ \ static bool __section(".data.once") __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ __warned = true; \ pr_err("ERROR: " fmt, ##__VA_ARGS__); \ } \ unlikely(__ret_warn_once); \ }) /* * syscalls are special, and need special handling, this is why * they are not included in trace_entries.h */ struct syscall_trace_enter { struct trace_entry ent; int nr; unsigned long args[]; }; struct syscall_trace_exit { struct trace_entry ent; int nr; long ret; }; struct kprobe_trace_entry_head { struct trace_entry ent; unsigned long ip; }; struct kretprobe_trace_entry_head { struct trace_entry ent; unsigned long func; unsigned long ret_ip; }; /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: * IRQS_OFF - interrupts were disabled * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags * NEED_RESCHED - reschedule is requested * HARDIRQ - inside an interrupt handler * SOFTIRQ - inside a softirq handler */ enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, TRACE_FLAG_IRQS_NOSUPPORT = 0x02, TRACE_FLAG_NEED_RESCHED = 0x04, TRACE_FLAG_HARDIRQ = 0x08, TRACE_FLAG_SOFTIRQ = 0x10, TRACE_FLAG_PREEMPT_RESCHED = 0x20, TRACE_FLAG_NMI = 0x40, }; #define TRACE_BUF_SIZE 1024 struct trace_array; /* * The CPU trace array - it consists of thousands of trace entries * plus some other descriptor data: (for example which task started * the trace, etc.) */ struct trace_array_cpu { atomic_t disabled; void *buffer_page; /* ring buffer spare */ unsigned long entries; unsigned long saved_latency; unsigned long critical_start; unsigned long critical_end; unsigned long critical_sequence; unsigned long nice; unsigned long policy; unsigned long rt_priority; unsigned long skipped_entries; u64 preempt_timestamp; pid_t pid; kuid_t uid; char comm[TASK_COMM_LEN]; #ifdef CONFIG_FUNCTION_TRACER int ftrace_ignore_pid; #endif bool ignore_pid; }; struct tracer; struct trace_option_dentry; struct array_buffer { struct trace_array *tr; struct trace_buffer *buffer; struct trace_array_cpu __percpu *data; u64 time_start; int cpu; }; #define TRACE_FLAGS_MAX_SIZE 32 struct trace_options { struct tracer *tracer; struct trace_option_dentry *topts; }; struct trace_pid_list { int pid_max; unsigned long *pids; }; enum { TRACE_PIDS = BIT(0), TRACE_NO_PIDS = BIT(1), }; static inline bool pid_type_enabled(int type, struct trace_pid_list *pid_list, struct trace_pid_list *no_pid_list) { /* Return true if the pid list in type has pids */ return ((type & TRACE_PIDS) && pid_list) || ((type & TRACE_NO_PIDS) && no_pid_list); } static inline bool still_need_pid_events(int type, struct trace_pid_list *pid_list, struct trace_pid_list *no_pid_list) { /* * Turning off what is in @type, return true if the "other" * pid list, still has pids in it. */ return (!(type & TRACE_PIDS) && pid_list) || (!(type & TRACE_NO_PIDS) && no_pid_list); } typedef bool (*cond_update_fn_t)(struct trace_array *tr, void *cond_data); /** * struct cond_snapshot - conditional snapshot data and callback * * The cond_snapshot structure encapsulates a callback function and * data associated with the snapshot for a given tracing instance. * * When a snapshot is taken conditionally, by invoking * tracing_snapshot_cond(tr, cond_data), the cond_data passed in is * passed in turn to the cond_snapshot.update() function. That data * can be compared by the update() implementation with the cond_data * contained within the struct cond_snapshot instance associated with * the trace_array. Because the tr->max_lock is held throughout the * update() call, the update() function can directly retrieve the * cond_snapshot and cond_data associated with the per-instance * snapshot associated with the trace_array. * * The cond_snapshot.update() implementation can save data to be * associated with the snapshot if it decides to, and returns 'true' * in that case, or it returns 'false' if the conditional snapshot * shouldn't be taken. * * The cond_snapshot instance is created and associated with the * user-defined cond_data by tracing_cond_snapshot_enable(). * Likewise, the cond_snapshot instance is destroyed and is no longer * associated with the trace instance by * tracing_cond_snapshot_disable(). * * The method below is required. * * @update: When a conditional snapshot is invoked, the update() * callback function is invoked with the tr->max_lock held. The * update() implementation signals whether or not to actually * take the snapshot, by returning 'true' if so, 'false' if no * snapshot should be taken. Because the max_lock is held for * the duration of update(), the implementation is safe to * directly retrieved and save any implementation data it needs * to in association with the snapshot. */ struct cond_snapshot { void *cond_data; cond_update_fn_t update; }; /* * The trace array - an array of per-CPU trace arrays. This is the * highest level data structure that individual tracers deal with. * They have on/off state as well: */ struct trace_array { struct list_head list; char *name; struct array_buffer array_buffer; #ifdef CONFIG_TRACER_MAX_TRACE /* * The max_buffer is used to snapshot the trace when a maximum * latency is reached, or when the user initiates a snapshot. * Some tracers will use this to store a maximum trace while * it continues examining live traces. * * The buffers for the max_buffer are set up the same as the array_buffer * When a snapshot is taken, the buffer of the max_buffer is swapped * with the buffer of the array_buffer and the buffers are reset for * the array_buffer so the tracing can continue. */ struct array_buffer max_buffer; bool allocated_snapshot; #endif #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) unsigned long max_latency; #ifdef CONFIG_FSNOTIFY struct dentry *d_max_latency; struct work_struct fsnotify_work; struct irq_work fsnotify_irqwork; #endif #endif struct trace_pid_list __rcu *filtered_pids; struct trace_pid_list __rcu *filtered_no_pids; /* * max_lock is used to protect the swapping of buffers * when taking a max snapshot. The buffers themselves are * protected by per_cpu spinlocks. But the action of the swap * needs its own lock. * * This is defined as a arch_spinlock_t in order to help * with performance when lockdep debugging is enabled. * * It is also used in other places outside the update_max_tr * so it needs to be defined outside of the * CONFIG_TRACER_MAX_TRACE. */ arch_spinlock_t max_lock; int buffer_disabled; #ifdef CONFIG_FTRACE_SYSCALLS int sys_refcount_enter; int sys_refcount_exit; struct trace_event_file __rcu *enter_syscall_files[NR_syscalls]; struct trace_event_file __rcu *exit_syscall_files[NR_syscalls]; #endif int stop_count; int clock_id; int nr_topts; bool clear_trace; int buffer_percent; unsigned int n_err_log_entries; struct tracer *current_trace; unsigned int trace_flags; unsigned char trace_flags_index[TRACE_FLAGS_MAX_SIZE]; unsigned int flags; raw_spinlock_t start_lock; struct list_head err_log; struct dentry *dir; struct dentry *options; struct dentry *percpu_dir; struct dentry *event_dir; struct trace_options *topts; struct list_head systems; struct list_head events; struct trace_event_file *trace_marker_file; cpumask_var_t tracing_cpumask; /* only trace on set CPUs */ int ref; int trace_ref; #ifdef CONFIG_FUNCTION_TRACER struct ftrace_ops *ops; struct trace_pid_list __rcu *function_pids; struct trace_pid_list __rcu *function_no_pids; #ifdef CONFIG_DYNAMIC_FTRACE /* All of these are protected by the ftrace_lock */ struct list_head func_probes; struct list_head mod_trace; struct list_head mod_notrace; #endif /* function tracing enabled */ int function_enabled; #endif int time_stamp_abs_ref; struct list_head hist_vars; #ifdef CONFIG_TRACER_SNAPSHOT struct cond_snapshot *cond_snapshot; #endif }; enum { TRACE_ARRAY_FL_GLOBAL = (1 << 0) }; extern struct list_head ftrace_trace_arrays; extern struct mutex trace_types_lock; extern int trace_array_get(struct trace_array *tr); extern int tracing_check_open_get_tr(struct trace_array *tr); extern struct trace_array *trace_array_find(const char *instance); extern struct trace_array *trace_array_find_get(const char *instance); extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs); extern int tracing_set_clock(struct trace_array *tr, const char *clockstr); extern bool trace_clock_in_ns(struct trace_array *tr); /* * The global tracer (top) should be the first trace array added, * but we check the flag anyway. */ static inline struct trace_array *top_trace_array(void) { struct trace_array *tr; if (list_empty(&ftrace_trace_arrays)) return NULL; tr = list_entry(ftrace_trace_arrays.prev, typeof(*tr), list); WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL)); return tr; } #define FTRACE_CMP_TYPE(var, type) \ __builtin_types_compatible_p(typeof(var), type *) #undef IF_ASSIGN #define IF_ASSIGN(var, entry, etype, id) \ if (FTRACE_CMP_TYPE(var, etype)) { \ var = (typeof(var))(entry); \ WARN_ON(id != 0 && (entry)->type != id); \ break; \ } /* Will cause compile errors if type is not found. */ extern void __ftrace_bad_type(void); /* * The trace_assign_type is a verifier that the entry type is * the same as the type being assigned. To add new types simply * add a line with the following format: * * IF_ASSIGN(var, ent, type, id); * * Where "type" is the trace type that includes the trace_entry * as the "ent" item. And "id" is the trace identifier that is * used in the trace_type enum. * * If the type can have more than one id, then use zero. */ #define trace_assign_type(var, ent) \ do { \ IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \ IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ IF_ASSIGN(var, ent, struct bputs_entry, TRACE_BPUTS); \ IF_ASSIGN(var, ent, struct hwlat_entry, TRACE_HWLAT); \ IF_ASSIGN(var, ent, struct raw_data_entry, TRACE_RAW_DATA);\ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ TRACE_MMIO_MAP); \ IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ TRACE_GRAPH_ENT); \ IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ TRACE_GRAPH_RET); \ __ftrace_bad_type(); \ } while (0) /* * An option specific to a tracer. This is a boolean value. * The bit is the bit index that sets its value on the * flags value in struct tracer_flags. */ struct tracer_opt { const char *name; /* Will appear on the trace_options file */ u32 bit; /* Mask assigned in val field in tracer_flags */ }; /* * The set of specific options for a tracer. Your tracer * have to set the initial value of the flags val. */ struct tracer_flags { u32 val; struct tracer_opt *opts; struct tracer *trace; }; /* Makes more easy to define a tracer opt */ #define TRACER_OPT(s, b) .name = #s, .bit = b struct trace_option_dentry { struct tracer_opt *opt; struct tracer_flags *flags; struct trace_array *tr; struct dentry *entry; }; /** * struct tracer - a specific tracer and its callbacks to interact with tracefs * @name: the name chosen to select it on the available_tracers file * @init: called when one switches to this tracer (echo name > current_tracer) * @reset: called when one switches to another tracer * @start: called when tracing is unpaused (echo 1 > tracing_on) * @stop: called when tracing is paused (echo 0 > tracing_on) * @update_thresh: called when tracing_thresh is updated * @open: called when the trace file is opened * @pipe_open: called when the trace_pipe file is opened * @close: called when the trace file is released * @pipe_close: called when the trace_pipe file is released * @read: override the default read callback on trace_pipe * @splice_read: override the default splice_read callback on trace_pipe * @selftest: selftest to run on boot (see trace_selftest.c) * @print_headers: override the first lines that describe your columns * @print_line: callback that prints a trace * @set_flag: signals one of your private flags changed (trace_options file) * @flags: your private flags */ struct tracer { const char *name; int (*init)(struct trace_array *tr); void (*reset)(struct trace_array *tr); void (*start)(struct trace_array *tr); void (*stop)(struct trace_array *tr); int (*update_thresh)(struct trace_array *tr); void (*open)(struct trace_iterator *iter); void (*pipe_open)(struct trace_iterator *iter); void (*close)(struct trace_iterator *iter); void (*pipe_close)(struct trace_iterator *iter); ssize_t (*read)(struct trace_iterator *iter, struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos); ssize_t (*splice_read)(struct trace_iterator *iter, struct file *filp, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); #ifdef CONFIG_FTRACE_STARTUP_TEST int (*selftest)(struct tracer *trace, struct trace_array *tr); #endif void (*print_header)(struct seq_file *m); enum print_line_t (*print_line)(struct trace_iterator *iter); /* If you handled the flag setting, return 0 */ int (*set_flag)(struct trace_array *tr, u32 old_flags, u32 bit, int set); /* Return 0 if OK with change, else return non-zero */ int (*flag_changed)(struct trace_array *tr, u32 mask, int set); struct tracer *next; struct tracer_flags *flags; int enabled; bool print_max; bool allow_instances; #ifdef CONFIG_TRACER_MAX_TRACE bool use_max_tr; #endif /* True if tracer cannot be enabled in kernel param */ bool noboot; }; /* Only current can touch trace_recursion */ /* * For function tracing recursion: * The order of these bits are important. * * When function tracing occurs, the following steps are made: * If arch does not support a ftrace feature: * call internal function (uses INTERNAL bits) which calls... * If callback is registered to the "global" list, the list * function is called and recursion checks the GLOBAL bits. * then this function calls... * The function callback, which can use the FTRACE bits to * check for recursion. */ enum { /* Function recursion bits */ TRACE_FTRACE_BIT, TRACE_FTRACE_NMI_BIT, TRACE_FTRACE_IRQ_BIT, TRACE_FTRACE_SIRQ_BIT, TRACE_FTRACE_TRANSITION_BIT, /* Internal use recursion bits */ TRACE_INTERNAL_BIT, TRACE_INTERNAL_NMI_BIT, TRACE_INTERNAL_IRQ_BIT, TRACE_INTERNAL_SIRQ_BIT, TRACE_INTERNAL_TRANSITION_BIT, TRACE_BRANCH_BIT, /* * Abuse of the trace_recursion. * As we need a way to maintain state if we are tracing the function * graph in irq because we want to trace a particular function that * was called in irq context but we have irq tracing off. Since this * can only be modified by current, we can reuse trace_recursion. */ TRACE_IRQ_BIT, /* Set if the function is in the set_graph_function file */ TRACE_GRAPH_BIT, /* * In the very unlikely case that an interrupt came in * at a start of graph tracing, and we want to trace * the function in that interrupt, the depth can be greater * than zero, because of the preempted start of a previous * trace. In an even more unlikely case, depth could be 2 * if a softirq interrupted the start of graph tracing, * followed by an interrupt preempting a start of graph * tracing in the softirq, and depth can even be 3 * if an NMI came in at the start of an interrupt function * that preempted a softirq start of a function that * preempted normal context!!!! Luckily, it can't be * greater than 3, so the next two bits are a mask * of what the depth is when we set TRACE_GRAPH_BIT */ TRACE_GRAPH_DEPTH_START_BIT, TRACE_GRAPH_DEPTH_END_BIT, /* * To implement set_graph_notrace, if this bit is set, we ignore * function graph tracing of called functions, until the return * function is called to clear it. */ TRACE_GRAPH_NOTRACE_BIT, }; #define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) #define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0) #define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit))) #define trace_recursion_depth() \ (((current)->trace_recursion >> TRACE_GRAPH_DEPTH_START_BIT) & 3) #define trace_recursion_set_depth(depth) \ do { \ current->trace_recursion &= \ ~(3 << TRACE_GRAPH_DEPTH_START_BIT); \ current->trace_recursion |= \ ((depth) & 3) << TRACE_GRAPH_DEPTH_START_BIT; \ } while (0) #define TRACE_CONTEXT_BITS 4 #define TRACE_FTRACE_START TRACE_FTRACE_BIT #define TRACE_LIST_START TRACE_INTERNAL_BIT #define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) enum { TRACE_CTX_NMI, TRACE_CTX_IRQ, TRACE_CTX_SOFTIRQ, TRACE_CTX_NORMAL, TRACE_CTX_TRANSITION, }; static __always_inline int trace_get_context_bit(void) { int bit; if (in_interrupt()) { if (in_nmi()) bit = TRACE_CTX_NMI; else if (in_irq()) bit = TRACE_CTX_IRQ; else bit = TRACE_CTX_SOFTIRQ; } else bit = TRACE_CTX_NORMAL; return bit; } static __always_inline int trace_test_and_set_recursion(int start) { unsigned int val = current->trace_recursion; int bit; bit = trace_get_context_bit() + start; if (unlikely(val & (1 << bit))) { /* * It could be that preempt_count has not been updated during * a switch between contexts. Allow for a single recursion. */ bit = start + TRACE_CTX_TRANSITION; if (trace_recursion_test(bit)) return -1; trace_recursion_set(bit); barrier(); return bit; } val |= 1 << bit; current->trace_recursion = val; barrier(); return bit; } static __always_inline void trace_clear_recursion(int bit) { unsigned int val = current->trace_recursion; bit = 1 << bit; val &= ~bit; barrier(); current->trace_recursion = val; } static inline struct ring_buffer_iter * trace_buffer_iter(struct trace_iterator *iter, int cpu) { return iter->buffer_iter ? iter->buffer_iter[cpu] : NULL; } int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); void tracing_reset_online_cpus(struct array_buffer *buf); void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); int tracing_open_generic(struct inode *inode, struct file *filp); int tracing_open_generic_tr(struct inode *inode, struct file *filp); bool tracing_is_disabled(void); bool tracer_tracing_is_on(struct trace_array *tr); void tracer_tracing_on(struct trace_array *tr); void tracer_tracing_off(struct trace_array *tr); struct dentry *trace_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); int tracing_init_dentry(void); struct ring_buffer_event; struct ring_buffer_event * trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, unsigned long flags, int pc); struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts); void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, struct ring_buffer_event *event); int trace_empty(struct trace_iterator *iter); void *trace_find_next_entry_inc(struct trace_iterator *iter); void trace_init_global_iter(struct trace_iterator *iter); void tracing_iter_reset(struct trace_iterator *iter, int cpu); unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu); unsigned long trace_total_entries(struct trace_array *tr); void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); void trace_graph_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); void trace_latency_header(struct seq_file *m); void trace_default_header(struct seq_file *m); void print_trace_header(struct seq_file *m, struct trace_iterator *iter); int trace_empty(struct trace_iterator *iter); void trace_graph_return(struct ftrace_graph_ret *trace); int trace_graph_entry(struct ftrace_graph_ent *trace); void set_graph_array(struct trace_array *tr); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); void tracing_start_tgid_record(void); void tracing_stop_tgid_record(void); int register_tracer(struct tracer *type); int is_tracing_stopped(void); loff_t tracing_lseek(struct file *file, loff_t offset, int whence); extern cpumask_var_t __read_mostly tracing_buffer_mask; #define for_each_tracing_cpu(cpu) \ for_each_cpu(cpu, tracing_buffer_mask) extern unsigned long nsecs_to_usecs(unsigned long nsecs); extern unsigned long tracing_thresh; /* PID filtering */ extern int pid_max; bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid); bool trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct trace_pid_list *filtered_no_pids, struct task_struct *task); void trace_filter_add_remove_task(struct trace_pid_list *pid_list, struct task_struct *self, struct task_struct *task); void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos); void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos); int trace_pid_show(struct seq_file *m, void *v); void trace_free_pid_list(struct trace_pid_list *pid_list); int trace_pid_write(struct trace_pid_list *filtered_pids, struct trace_pid_list **new_pid_list, const char __user *ubuf, size_t cnt); #ifdef CONFIG_TRACER_MAX_TRACE void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, void *cond_data); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); #endif /* CONFIG_TRACER_MAX_TRACE */ #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \ defined(CONFIG_FSNOTIFY) void latency_fsnotify(struct trace_array *tr); #else static inline void latency_fsnotify(struct trace_array *tr) { } #endif #ifdef CONFIG_STACKTRACE void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc); #else static inline void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc) { } #endif /* CONFIG_STACKTRACE */ extern u64 ftrace_now(int cpu); extern void trace_find_cmdline(int pid, char comm[]); extern int trace_find_tgid(int pid); extern void trace_event_follow_fork(struct trace_array *tr, bool enable); #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; extern unsigned long ftrace_number_of_pages; extern unsigned long ftrace_number_of_groups; void ftrace_init_trace_array(struct trace_array *tr); #else static inline void ftrace_init_trace_array(struct trace_array *tr) { } #endif #define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func extern int DYN_FTRACE_TEST_NAME(void); #define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2 extern int DYN_FTRACE_TEST_NAME2(void); extern bool ring_buffer_expanded; extern bool tracing_selftest_disabled; #ifdef CONFIG_FTRACE_STARTUP_TEST extern void __init disable_tracing_selftest(const char *reason); extern int trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_function_graph(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr); /* * Tracer data references selftest functions that only occur * on boot up. These can be __init functions. Thus, when selftests * are enabled, then the tracers need to reference __init functions. */ #define __tracer_data __refdata #else static inline void __init disable_tracing_selftest(const char *reason) { } /* Tracers are seldom changed. Optimize when selftests are disabled. */ #define __tracer_data __read_mostly #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); extern unsigned long long ns2usecs(u64 nsec); extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args); extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args); extern int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args); int trace_array_printk_buf(struct trace_buffer *buffer, unsigned long ip, const char *fmt, ...); void trace_printk_seq(struct trace_seq *s); enum print_line_t print_trace_line(struct trace_iterator *iter); extern char trace_find_mark(unsigned long long duration); struct ftrace_hash; struct ftrace_mod_load { struct list_head list; char *func; char *module; int enable; }; enum { FTRACE_HASH_FL_MOD = (1 << 0), }; struct ftrace_hash { unsigned long size_bits; struct hlist_head *buckets; unsigned long count; unsigned long flags; struct rcu_head rcu; }; struct ftrace_func_entry * ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip); static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash) { return !hash || !(hash->count || (hash->flags & FTRACE_HASH_FL_MOD)); } /* Standard output formatting function used for function return traces */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Flag options */ #define TRACE_GRAPH_PRINT_OVERRUN 0x1 #define TRACE_GRAPH_PRINT_CPU 0x2 #define TRACE_GRAPH_PRINT_OVERHEAD 0x4 #define TRACE_GRAPH_PRINT_PROC 0x8 #define TRACE_GRAPH_PRINT_DURATION 0x10 #define TRACE_GRAPH_PRINT_ABS_TIME 0x20 #define TRACE_GRAPH_PRINT_REL_TIME 0x40 #define TRACE_GRAPH_PRINT_IRQS 0x80 #define TRACE_GRAPH_PRINT_TAIL 0x100 #define TRACE_GRAPH_SLEEP_TIME 0x200 #define TRACE_GRAPH_GRAPH_TIME 0x400 #define TRACE_GRAPH_PRINT_FILL_SHIFT 28 #define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT) extern void ftrace_graph_sleep_time_control(bool enable); #ifdef CONFIG_FUNCTION_PROFILER extern void ftrace_graph_graph_time_control(bool enable); #else static inline void ftrace_graph_graph_time_control(bool enable) { } #endif extern enum print_line_t print_graph_function_flags(struct trace_iterator *iter, u32 flags); extern void print_graph_headers_flags(struct seq_file *s, u32 flags); extern void trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); extern void graph_trace_open(struct trace_iterator *iter); extern void graph_trace_close(struct trace_iterator *iter); extern int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, unsigned long flags, int pc); extern void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, unsigned long flags, int pc); #ifdef CONFIG_DYNAMIC_FTRACE extern struct ftrace_hash __rcu *ftrace_graph_hash; extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash; static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { unsigned long addr = trace->func; int ret = 0; struct ftrace_hash *hash; preempt_disable_notrace(); /* * Have to open code "rcu_dereference_sched()" because the * function graph tracer can be called when RCU is not * "watching". * Protected with schedule_on_each_cpu(ftrace_sync) */ hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible()); if (ftrace_hash_empty(hash)) { ret = 1; goto out; } if (ftrace_lookup_ip(hash, addr)) { /* * This needs to be cleared on the return functions * when the depth is zero. */ trace_recursion_set(TRACE_GRAPH_BIT); trace_recursion_set_depth(trace->depth); /* * If no irqs are to be traced, but a set_graph_function * is set, and called by an interrupt handler, we still * want to trace it. */ if (in_irq()) trace_recursion_set(TRACE_IRQ_BIT); else trace_recursion_clear(TRACE_IRQ_BIT); ret = 1; } out: preempt_enable_notrace(); return ret; } static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) { if (trace_recursion_test(TRACE_GRAPH_BIT) && trace->depth == trace_recursion_depth()) trace_recursion_clear(TRACE_GRAPH_BIT); } static inline int ftrace_graph_notrace_addr(unsigned long addr) { int ret = 0; struct ftrace_hash *notrace_hash; preempt_disable_notrace(); /* * Have to open code "rcu_dereference_sched()" because the * function graph tracer can be called when RCU is not * "watching". * Protected with schedule_on_each_cpu(ftrace_sync) */ notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash, !preemptible()); if (ftrace_lookup_ip(notrace_hash, addr)) ret = 1; preempt_enable_notrace(); return ret; } #else static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { return 1; } static inline int ftrace_graph_notrace_addr(unsigned long addr) { return 0; } static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) { } #endif /* CONFIG_DYNAMIC_FTRACE */ extern unsigned int fgraph_max_depth; static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace) { /* trace it when it is-nested-in or is a function enabled. */ return !(trace_recursion_test(TRACE_GRAPH_BIT) || ftrace_graph_addr(trace)) || (trace->depth < 0) || (fgraph_max_depth && trace->depth >= fgraph_max_depth); } #else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t print_graph_function_flags(struct trace_iterator *iter, u32 flags) { return TRACE_TYPE_UNHANDLED; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ extern struct list_head ftrace_pids; #ifdef CONFIG_FUNCTION_TRACER #define FTRACE_PID_IGNORE -1 #define FTRACE_PID_TRACE -2 struct ftrace_func_command { struct list_head list; char *name; int (*func)(struct trace_array *tr, struct ftrace_hash *hash, char *func, char *cmd, char *params, int enable); }; extern bool ftrace_filter_param __initdata; static inline int ftrace_trace_task(struct trace_array *tr) { return this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid) != FTRACE_PID_IGNORE; } extern int ftrace_is_dead(void); int ftrace_create_function_files(struct trace_array *tr, struct dentry *parent); void ftrace_destroy_function_files(struct trace_array *tr); int ftrace_allocate_ftrace_ops(struct trace_array *tr); void ftrace_free_ftrace_ops(struct trace_array *tr); void ftrace_init_global_array_ops(struct trace_array *tr); void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func); void ftrace_reset_array_ops(struct trace_array *tr); void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer); void ftrace_init_tracefs_toplevel(struct trace_array *tr, struct dentry *d_tracer); void ftrace_clear_pids(struct trace_array *tr); int init_function_trace(void); void ftrace_pid_follow_fork(struct trace_array *tr, bool enable); #else static inline int ftrace_trace_task(struct trace_array *tr) { return 1; } static inline int ftrace_is_dead(void) { return 0; } static inline int ftrace_create_function_files(struct trace_array *tr, struct dentry *parent) { return 0; } static inline int ftrace_allocate_ftrace_ops(struct trace_array *tr) { return 0; } static inline void ftrace_free_ftrace_ops(struct trace_array *tr) { } static inline void ftrace_destroy_function_files(struct trace_array *tr) { } static inline __init void ftrace_init_global_array_ops(struct trace_array *tr) { } static inline void ftrace_reset_array_ops(struct trace_array *tr) { } static inline void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d) { } static inline void ftrace_init_tracefs_toplevel(struct trace_array *tr, struct dentry *d) { } static inline void ftrace_clear_pids(struct trace_array *tr) { } static inline int init_function_trace(void) { return 0; } static inline void ftrace_pid_follow_fork(struct trace_array *tr, bool enable) { } /* ftace_func_t type is not defined, use macro instead of static inline */ #define ftrace_init_array_ops(tr, func) do { } while (0) #endif /* CONFIG_FUNCTION_TRACER */ #if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) struct ftrace_probe_ops { void (*func)(unsigned long ip, unsigned long parent_ip, struct trace_array *tr, struct ftrace_probe_ops *ops, void *data); int (*init)(struct ftrace_probe_ops *ops, struct trace_array *tr, unsigned long ip, void *init_data, void **data); void (*free)(struct ftrace_probe_ops *ops, struct trace_array *tr, unsigned long ip, void *data); int (*print)(struct seq_file *m, unsigned long ip, struct ftrace_probe_ops *ops, void *data); }; struct ftrace_func_mapper; typedef int (*ftrace_mapper_func)(void *data); struct ftrace_func_mapper *allocate_ftrace_func_mapper(void); void **ftrace_func_mapper_find_ip(struct ftrace_func_mapper *mapper, unsigned long ip); int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper, unsigned long ip, void *data); void *ftrace_func_mapper_remove_ip(struct ftrace_func_mapper *mapper, unsigned long ip); void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper, ftrace_mapper_func free_func); extern int register_ftrace_function_probe(char *glob, struct trace_array *tr, struct ftrace_probe_ops *ops, void *data); extern int unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, struct ftrace_probe_ops *ops); extern void clear_ftrace_function_probes(struct trace_array *tr); int register_ftrace_command(struct ftrace_func_command *cmd); int unregister_ftrace_command(struct ftrace_func_command *cmd); void ftrace_create_filter_files(struct ftrace_ops *ops, struct dentry *parent); void ftrace_destroy_filter_files(struct ftrace_ops *ops); extern int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); extern int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); #else struct ftrace_func_command; static inline __init int register_ftrace_command(struct ftrace_func_command *cmd) { return -EINVAL; } static inline __init int unregister_ftrace_command(char *cmd_name) { return -EINVAL; } static inline void clear_ftrace_function_probes(struct trace_array *tr) { } /* * The ops parameter passed in is usually undefined. * This must be a macro. */ #define ftrace_create_filter_files(ops, parent) do { } while (0) #define ftrace_destroy_filter_files(ops) do { } while (0) #endif /* CONFIG_FUNCTION_TRACER && CONFIG_DYNAMIC_FTRACE */ bool ftrace_event_is_function(struct trace_event_call *call); /* * struct trace_parser - servers for reading the user input separated by spaces * @cont: set if the input is not complete - no final space char was found * @buffer: holds the parsed user input * @idx: user input length * @size: buffer size */ struct trace_parser { bool cont; char *buffer; unsigned idx; unsigned size; }; static inline bool trace_parser_loaded(struct trace_parser *parser) { return (parser->idx != 0); } static inline bool trace_parser_cont(struct trace_parser *parser) { return parser->cont; } static inline void trace_parser_clear(struct trace_parser *parser) { parser->cont = false; parser->idx = 0; } extern int trace_parser_get_init(struct trace_parser *parser, int size); extern void trace_parser_put(struct trace_parser *parser); extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, size_t cnt, loff_t *ppos); /* * Only create function graph options if function graph is configured. */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER # define FGRAPH_FLAGS \ C(DISPLAY_GRAPH, "display-graph"), #else # define FGRAPH_FLAGS #endif #ifdef CONFIG_BRANCH_TRACER # define BRANCH_FLAGS \ C(BRANCH, "branch"), #else # define BRANCH_FLAGS #endif #ifdef CONFIG_FUNCTION_TRACER # define FUNCTION_FLAGS \ C(FUNCTION, "function-trace"), \ C(FUNC_FORK, "function-fork"), # define FUNCTION_DEFAULT_FLAGS TRACE_ITER_FUNCTION #else # define FUNCTION_FLAGS # define FUNCTION_DEFAULT_FLAGS 0UL # define TRACE_ITER_FUNC_FORK 0UL #endif #ifdef CONFIG_STACKTRACE # define STACK_FLAGS \ C(STACKTRACE, "stacktrace"), #else # define STACK_FLAGS #endif /* * trace_iterator_flags is an enumeration that defines bit * positions into trace_flags that controls the output. * * NOTE: These bits must match the trace_options array in * trace.c (this macro guarantees it). */ #define TRACE_FLAGS \ C(PRINT_PARENT, "print-parent"), \ C(SYM_OFFSET, "sym-offset"), \ C(SYM_ADDR, "sym-addr"), \ C(VERBOSE, "verbose"), \ C(RAW, "raw"), \ C(HEX, "hex"), \ C(BIN, "bin"), \ C(BLOCK, "block"), \ C(PRINTK, "trace_printk"), \ C(ANNOTATE, "annotate"), \ C(USERSTACKTRACE, "userstacktrace"), \ C(SYM_USEROBJ, "sym-userobj"), \ C(PRINTK_MSGONLY, "printk-msg-only"), \ C(CONTEXT_INFO, "context-info"), /* Print pid/cpu/time */ \ C(LATENCY_FMT, "latency-format"), \ C(RECORD_CMD, "record-cmd"), \ C(RECORD_TGID, "record-tgid"), \ C(OVERWRITE, "overwrite"), \ C(STOP_ON_FREE, "disable_on_free"), \ C(IRQ_INFO, "irq-info"), \ C(MARKERS, "markers"), \ C(EVENT_FORK, "event-fork"), \ C(PAUSE_ON_TRACE, "pause-on-trace"), \ FUNCTION_FLAGS \ FGRAPH_FLAGS \ STACK_FLAGS \ BRANCH_FLAGS /* * By defining C, we can make TRACE_FLAGS a list of bit names * that will define the bits for the flag masks. */ #undef C #define C(a, b) TRACE_ITER_##a##_BIT enum trace_iterator_bits { TRACE_FLAGS /* Make sure we don't go more than we have bits for */ TRACE_ITER_LAST_BIT }; /* * By redefining C, we can make TRACE_FLAGS a list of masks that * use the bits as defined above. */ #undef C #define C(a, b) TRACE_ITER_##a = (1 << TRACE_ITER_##a##_BIT) enum trace_iterator_flags { TRACE_FLAGS }; /* * TRACE_ITER_SYM_MASK masks the options in trace_flags that * control the output of kernel symbols. */ #define TRACE_ITER_SYM_MASK \ (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) extern struct tracer nop_trace; #ifdef CONFIG_BRANCH_TRACER extern int enable_branch_tracing(struct trace_array *tr); extern void disable_branch_tracing(void); static inline int trace_branch_enable(struct trace_array *tr) { if (tr->trace_flags & TRACE_ITER_BRANCH) return enable_branch_tracing(tr); return 0; } static inline void trace_branch_disable(void) { /* due to races, always disable */ disable_branch_tracing(); } #else static inline int trace_branch_enable(struct trace_array *tr) { return 0; } static inline void trace_branch_disable(void) { } #endif /* CONFIG_BRANCH_TRACER */ /* set ring buffers to default size if not already done so */ int tracing_update_buffers(void); struct ftrace_event_field { struct list_head link; const char *name; const char *type; int filter_type; int offset; int size; int is_signed; }; struct prog_entry; struct event_filter { struct prog_entry __rcu *prog; char *filter_string; }; struct event_subsystem { struct list_head list; const char *name; struct event_filter *filter; int ref_count; }; struct trace_subsystem_dir { struct list_head list; struct event_subsystem *subsystem; struct trace_array *tr; struct dentry *entry; int ref_count; int nr_events; }; extern int call_filter_check_discard(struct trace_event_call *call, void *rec, struct trace_buffer *buffer, struct ring_buffer_event *event); void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc, struct pt_regs *regs); static inline void trace_buffer_unlock_commit(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc) { trace_buffer_unlock_commit_regs(tr, buffer, event, flags, pc, NULL); } DECLARE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); DECLARE_PER_CPU(int, trace_buffered_event_cnt); void trace_buffered_event_disable(void); void trace_buffered_event_enable(void); static inline void __trace_event_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { if (this_cpu_read(trace_buffered_event) == event) { /* Simply release the temp buffer */ this_cpu_dec(trace_buffered_event_cnt); return; } ring_buffer_discard_commit(buffer, event); } /* * Helper function for event_trigger_unlock_commit{_regs}(). * If there are event triggers attached to this event that requires * filtering against its fields, then they will be called as the * entry already holds the field information of the current event. * * It also checks if the event should be discarded or not. * It is to be discarded if the event is soft disabled and the * event was only recorded to process triggers, or if the event * filter is active and this event did not match the filters. * * Returns true if the event is discarded, false otherwise. */ static inline bool __event_trigger_test_discard(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, enum event_trigger_type *tt) { unsigned long eflags = file->flags; if (eflags & EVENT_FILE_FL_TRIGGER_COND) *tt = event_triggers_call(file, entry, event); if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && !filter_match_preds(file->filter, entry))) { __trace_event_discard_commit(buffer, event); return true; } return false; } /** * event_trigger_unlock_commit - handle triggers and finish event commit * @file: The file pointer assoctiated to the event * @buffer: The ring buffer that the event is being written to * @event: The event meta data in the ring buffer * @entry: The event itself * @irq_flags: The state of the interrupts at the start of the event * @pc: The state of the preempt count at the start of the event. * * This is a helper function to handle triggers that require data * from the event itself. It also tests the event against filters and * if the event is soft disabled and should be discarded. */ static inline void event_trigger_unlock_commit(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, unsigned long irq_flags, int pc) { enum event_trigger_type tt = ETT_NONE; if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); if (tt) event_triggers_post_call(file, tt); } /** * event_trigger_unlock_commit_regs - handle triggers and finish event commit * @file: The file pointer assoctiated to the event * @buffer: The ring buffer that the event is being written to * @event: The event meta data in the ring buffer * @entry: The event itself * @irq_flags: The state of the interrupts at the start of the event * @pc: The state of the preempt count at the start of the event. * * This is a helper function to handle triggers that require data * from the event itself. It also tests the event against filters and * if the event is soft disabled and should be discarded. * * Same as event_trigger_unlock_commit() but calls * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit(). */ static inline void event_trigger_unlock_commit_regs(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, unsigned long irq_flags, int pc, struct pt_regs *regs) { enum event_trigger_type tt = ETT_NONE; if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) trace_buffer_unlock_commit_regs(file->tr, buffer, event, irq_flags, pc, regs); if (tt) event_triggers_post_call(file, tt); } #define FILTER_PRED_INVALID ((unsigned short)-1) #define FILTER_PRED_IS_RIGHT (1 << 15) #define FILTER_PRED_FOLD (1 << 15) /* * The max preds is the size of unsigned short with * two flags at the MSBs. One bit is used for both the IS_RIGHT * and FOLD flags. The other is reserved. * * 2^14 preds is way more than enough. */ #define MAX_FILTER_PRED 16384 struct filter_pred; struct regex; typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); typedef int (*regex_match_func)(char *str, struct regex *r, int len); enum regex_type { MATCH_FULL = 0, MATCH_FRONT_ONLY, MATCH_MIDDLE_ONLY, MATCH_END_ONLY, MATCH_GLOB, MATCH_INDEX, }; struct regex { char pattern[MAX_FILTER_STR_VAL]; int len; int field_len; regex_match_func match; }; struct filter_pred { filter_pred_fn_t fn; u64 val; struct regex regex; unsigned short *ops; struct ftrace_event_field *field; int offset; int not; int op; }; static inline bool is_string_field(struct ftrace_event_field *field) { return field->filter_type == FILTER_DYN_STRING || field->filter_type == FILTER_STATIC_STRING || field->filter_type == FILTER_PTR_STRING || field->filter_type == FILTER_COMM; } static inline bool is_function_field(struct ftrace_event_field *field) { return field->filter_type == FILTER_TRACE_FN; } extern enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not); extern void print_event_filter(struct trace_event_file *file, struct trace_seq *s); extern int apply_event_filter(struct trace_event_file *file, char *filter_string); extern int apply_subsystem_event_filter(struct trace_subsystem_dir *dir, char *filter_string); extern void print_subsystem_event_filter(struct event_subsystem *system, struct trace_seq *s); extern int filter_assign_type(const char *type); extern int create_event_filter(struct trace_array *tr, struct trace_event_call *call, char *filter_str, bool set_str, struct event_filter **filterp); extern void free_event_filter(struct event_filter *filter); struct ftrace_event_field * trace_find_event_field(struct trace_event_call *call, char *name); extern void trace_event_enable_cmd_record(bool enable); extern void trace_event_enable_tgid_record(bool enable); extern int event_trace_init(void); extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr); extern int event_trace_del_tracer(struct trace_array *tr); extern void __trace_early_add_events(struct trace_array *tr); extern struct trace_event_file *__find_event_file(struct trace_array *tr, const char *system, const char *event); extern struct trace_event_file *find_event_file(struct trace_array *tr, const char *system, const char *event); static inline void *event_file_data(struct file *filp) { return READ_ONCE(file_inode(filp)->i_private); } extern struct mutex event_mutex; extern struct list_head ftrace_events; extern const struct file_operations event_trigger_fops; extern const struct file_operations event_hist_fops; extern const struct file_operations event_hist_debug_fops; extern const struct file_operations event_inject_fops; #ifdef CONFIG_HIST_TRIGGERS extern int register_trigger_hist_cmd(void); extern int register_trigger_hist_enable_disable_cmds(void); #else static inline int register_trigger_hist_cmd(void) { return 0; } static inline int register_trigger_hist_enable_disable_cmds(void) { return 0; } #endif extern int register_trigger_cmds(void); extern void clear_event_triggers(struct trace_array *tr); struct event_trigger_data { unsigned long count; int ref; struct event_trigger_ops *ops; struct event_command *cmd_ops; struct event_filter __rcu *filter; char *filter_str; void *private_data; bool paused; bool paused_tmp; struct list_head list; char *name; struct list_head named_list; struct event_trigger_data *named_data; }; /* Avoid typos */ #define ENABLE_EVENT_STR "enable_event" #define DISABLE_EVENT_STR "disable_event" #define ENABLE_HIST_STR "enable_hist" #define DISABLE_HIST_STR "disable_hist" struct enable_trigger_data { struct trace_event_file *file; bool enable; bool hist; }; extern int event_enable_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, struct event_trigger_data *data); extern void event_enable_trigger_free(struct event_trigger_ops *ops, struct event_trigger_data *data); extern int event_enable_trigger_func(struct event_command *cmd_ops, struct trace_event_file *file, char *glob, char *cmd, char *param); extern int event_enable_register_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); extern void event_enable_unregister_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *test, struct trace_event_file *file); extern void trigger_data_free(struct event_trigger_data *data); extern int event_trigger_init(struct event_trigger_ops *ops, struct event_trigger_data *data); extern int trace_event_trigger_enable_disable(struct trace_event_file *file, int trigger_enable); extern void update_cond_flag(struct trace_event_file *file); extern int set_trigger_filter(char *filter_str, struct event_trigger_data *trigger_data, struct trace_event_file *file); extern struct event_trigger_data *find_named_trigger(const char *name); extern bool is_named_trigger(struct event_trigger_data *test); extern int save_named_trigger(const char *name, struct event_trigger_data *data); extern void del_named_trigger(struct event_trigger_data *data); extern void pause_named_trigger(struct event_trigger_data *data); extern void unpause_named_trigger(struct event_trigger_data *data); extern void set_named_trigger_data(struct event_trigger_data *data, struct event_trigger_data *named_data); extern struct event_trigger_data * get_named_trigger_data(struct event_trigger_data *data); extern int register_event_command(struct event_command *cmd); extern int unregister_event_command(struct event_command *cmd); extern int register_trigger_hist_enable_disable_cmds(void); /** * struct event_trigger_ops - callbacks for trace event triggers * * The methods in this structure provide per-event trigger hooks for * various trigger operations. * * All the methods below, except for @init() and @free(), must be * implemented. * * @func: The trigger 'probe' function called when the triggering * event occurs. The data passed into this callback is the data * that was supplied to the event_command @reg() function that * registered the trigger (see struct event_command) along with * the trace record, rec. * * @init: An optional initialization function called for the trigger * when the trigger is registered (via the event_command reg() * function). This can be used to perform per-trigger * initialization such as incrementing a per-trigger reference * count, for instance. This is usually implemented by the * generic utility function @event_trigger_init() (see * trace_event_triggers.c). * * @free: An optional de-initialization function called for the * trigger when the trigger is unregistered (via the * event_command @reg() function). This can be used to perform * per-trigger de-initialization such as decrementing a * per-trigger reference count and freeing corresponding trigger * data, for instance. This is usually implemented by the * generic utility function @event_trigger_free() (see * trace_event_triggers.c). * * @print: The callback function invoked to have the trigger print * itself. This is usually implemented by a wrapper function * that calls the generic utility function @event_trigger_print() * (see trace_event_triggers.c). */ struct event_trigger_ops { void (*func)(struct event_trigger_data *data, void *rec, struct ring_buffer_event *rbe); int (*init)(struct event_trigger_ops *ops, struct event_trigger_data *data); void (*free)(struct event_trigger_ops *ops, struct event_trigger_data *data); int (*print)(struct seq_file *m, struct event_trigger_ops *ops, struct event_trigger_data *data); }; /** * struct event_command - callbacks and data members for event commands * * Event commands are invoked by users by writing the command name * into the 'trigger' file associated with a trace event. The * parameters associated with a specific invocation of an event * command are used to create an event trigger instance, which is * added to the list of trigger instances associated with that trace * event. When the event is hit, the set of triggers associated with * that event is invoked. * * The data members in this structure provide per-event command data * for various event commands. * * All the data members below, except for @post_trigger, must be set * for each event command. * * @name: The unique name that identifies the event command. This is * the name used when setting triggers via trigger files. * * @trigger_type: A unique id that identifies the event command * 'type'. This value has two purposes, the first to ensure that * only one trigger of the same type can be set at a given time * for a particular event e.g. it doesn't make sense to have both * a traceon and traceoff trigger attached to a single event at * the same time, so traceon and traceoff have the same type * though they have different names. The @trigger_type value is * also used as a bit value for deferring the actual trigger * action until after the current event is finished. Some * commands need to do this if they themselves log to the trace * buffer (see the @post_trigger() member below). @trigger_type * values are defined by adding new values to the trigger_type * enum in include/linux/trace_events.h. * * @flags: See the enum event_command_flags below. * * All the methods below, except for @set_filter() and @unreg_all(), * must be implemented. * * @func: The callback function responsible for parsing and * registering the trigger written to the 'trigger' file by the * user. It allocates the trigger instance and registers it with * the appropriate trace event. It makes use of the other * event_command callback functions to orchestrate this, and is * usually implemented by the generic utility function * @event_trigger_callback() (see trace_event_triggers.c). * * @reg: Adds the trigger to the list of triggers associated with the * event, and enables the event trigger itself, after * initializing it (via the event_trigger_ops @init() function). * This is also where commands can use the @trigger_type value to * make the decision as to whether or not multiple instances of * the trigger should be allowed. This is usually implemented by * the generic utility function @register_trigger() (see * trace_event_triggers.c). * * @unreg: Removes the trigger from the list of triggers associated * with the event, and disables the event trigger itself, after * initializing it (via the event_trigger_ops @free() function). * This is usually implemented by the generic utility function * @unregister_trigger() (see trace_event_triggers.c). * * @unreg_all: An optional function called to remove all the triggers * from the list of triggers associated with the event. Called * when a trigger file is opened in truncate mode. * * @set_filter: An optional function called to parse and set a filter * for the trigger. If no @set_filter() method is set for the * event command, filters set by the user for the command will be * ignored. This is usually implemented by the generic utility * function @set_trigger_filter() (see trace_event_triggers.c). * * @get_trigger_ops: The callback function invoked to retrieve the * event_trigger_ops implementation associated with the command. */ struct event_command { struct list_head list; char *name; enum event_trigger_type trigger_type; int flags; int (*func)(struct event_command *cmd_ops, struct trace_event_file *file, char *glob, char *cmd, char *params); int (*reg)(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); void (*unreg)(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); void (*unreg_all)(struct trace_event_file *file); int (*set_filter)(char *filter_str, struct event_trigger_data *data, struct trace_event_file *file); struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param); }; /** * enum event_command_flags - flags for struct event_command * * @POST_TRIGGER: A flag that says whether or not this command needs * to have its action delayed until after the current event has * been closed. Some triggers need to avoid being invoked while * an event is currently in the process of being logged, since * the trigger may itself log data into the trace buffer. Thus * we make sure the current event is committed before invoking * those triggers. To do that, the trigger invocation is split * in two - the first part checks the filter using the current * trace record; if a command has the @post_trigger flag set, it * sets a bit for itself in the return value, otherwise it * directly invokes the trigger. Once all commands have been * either invoked or set their return flag, the current record is * either committed or discarded. At that point, if any commands * have deferred their triggers, those commands are finally * invoked following the close of the current event. In other * words, if the event_trigger_ops @func() probe implementation * itself logs to the trace buffer, this flag should be set, * otherwise it can be left unspecified. * * @NEEDS_REC: A flag that says whether or not this command needs * access to the trace record in order to perform its function, * regardless of whether or not it has a filter associated with * it (filters make a trigger require access to the trace record * but are not always present). */ enum event_command_flags { EVENT_CMD_FL_POST_TRIGGER = 1, EVENT_CMD_FL_NEEDS_REC = 2, }; static inline bool event_command_post_trigger(struct event_command *cmd_ops) { return cmd_ops->flags & EVENT_CMD_FL_POST_TRIGGER; } static inline bool event_command_needs_rec(struct event_command *cmd_ops) { return cmd_ops->flags & EVENT_CMD_FL_NEEDS_REC; } extern int trace_event_enable_disable(struct trace_event_file *file, int enable, int soft_disable); extern int tracing_alloc_snapshot(void); extern void tracing_snapshot_cond(struct trace_array *tr, void *cond_data); extern int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update); extern int tracing_snapshot_cond_disable(struct trace_array *tr); extern void *tracing_cond_snapshot_data(struct trace_array *tr); extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; extern const char *__start___tracepoint_str[]; extern const char *__stop___tracepoint_str[]; void trace_printk_control(bool enabled); void trace_printk_start_comm(void); int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled); /* Used from boot time tracer */ extern int trace_set_options(struct trace_array *tr, char *option); extern int tracing_set_tracer(struct trace_array *tr, const char *buf); extern ssize_t tracing_resize_ring_buffer(struct trace_array *tr, unsigned long size, int cpu_id); extern int tracing_set_cpumask(struct trace_array *tr, cpumask_var_t tracing_cpumask_new); #define MAX_EVENT_NAME_LEN 64 extern int trace_run_command(const char *buf, int (*createfn)(int, char**)); extern ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, int (*createfn)(int, char**)); extern unsigned int err_pos(char *cmd, const char *str); extern void tracing_log_err(struct trace_array *tr, const char *loc, const char *cmd, const char **errs, u8 type, u8 pos); /* * Normal trace_printk() and friends allocates special buffers * to do the manipulation, as well as saves the print formats * into sections to display. But the trace infrastructure wants * to use these without the added overhead at the price of being * a bit slower (used mainly for warnings, where we don't care * about performance). The internal_trace_puts() is for such * a purpose. */ #define internal_trace_puts(str) __trace_puts(_THIS_IP_, str, strlen(str)) #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ extern struct trace_event_call \ __aligned(4) event_##call; #undef FTRACE_ENTRY_DUP #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) #undef FTRACE_ENTRY_PACKED #define FTRACE_ENTRY_PACKED(call, struct_name, id, tstruct, print) \ FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) #include "trace_entries.h" #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_FUNCTION_TRACER) int perf_ftrace_event_register(struct trace_event_call *call, enum trace_reg type, void *data); #else #define perf_ftrace_event_register NULL #endif #ifdef CONFIG_FTRACE_SYSCALLS void init_ftrace_syscalls(void); const char *get_syscall_name(int syscall); #else static inline void init_ftrace_syscalls(void) { } static inline const char *get_syscall_name(int syscall) { return NULL; } #endif #ifdef CONFIG_EVENT_TRACING void trace_event_init(void); void trace_event_eval_update(struct trace_eval_map **map, int len); /* Used from boot time tracer */ extern int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); extern int trigger_process_regex(struct trace_event_file *file, char *buff); #else static inline void __init trace_event_init(void) { } static inline void trace_event_eval_update(struct trace_eval_map **map, int len) { } #endif #ifdef CONFIG_TRACER_SNAPSHOT void tracing_snapshot_instance(struct trace_array *tr); int tracing_alloc_snapshot_instance(struct trace_array *tr); #else static inline void tracing_snapshot_instance(struct trace_array *tr) { } static inline int tracing_alloc_snapshot_instance(struct trace_array *tr) { return 0; } #endif #ifdef CONFIG_PREEMPT_TRACER void tracer_preempt_on(unsigned long a0, unsigned long a1); void tracer_preempt_off(unsigned long a0, unsigned long a1); #else static inline void tracer_preempt_on(unsigned long a0, unsigned long a1) { } static inline void tracer_preempt_off(unsigned long a0, unsigned long a1) { } #endif #ifdef CONFIG_IRQSOFF_TRACER void tracer_hardirqs_on(unsigned long a0, unsigned long a1); void tracer_hardirqs_off(unsigned long a0, unsigned long a1); #else static inline void tracer_hardirqs_on(unsigned long a0, unsigned long a1) { } static inline void tracer_hardirqs_off(unsigned long a0, unsigned long a1) { } #endif extern struct trace_iterator *tracepoint_print_iter; /* * Reset the state of the trace_iterator so that it can read consumed data. * Normally, the trace_iterator is used for reading the data when it is not * consumed, and must retain state. */ static __always_inline void trace_iterator_reset(struct trace_iterator *iter) { const size_t offset = offsetof(struct trace_iterator, seq); /* * Keep gcc from complaining about overwriting more than just one * member in the structure. */ memset((char *)iter + offset, 0, sizeof(struct trace_iterator) - offset); iter->pos = -1; } /* Check the name is good for event/group/fields */ static inline bool is_good_name(const char *name) { if (!isalpha(*name) && *name != '_') return false; while (*++name != '\0') { if (!isalpha(*name) && !isdigit(*name) && *name != '_') return false; } return true; } #endif /* _LINUX_KERNEL_TRACE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 // SPDX-License-Identifier: GPL-2.0 /* * fs/ext4/mballoc.h * * Written by: Alex Tomas <alex@clusterfs.com> * */ #ifndef _EXT4_MBALLOC_H #define _EXT4_MBALLOC_H #include <linux/time.h> #include <linux/fs.h> #include <linux/namei.h> #include <linux/quotaops.h> #include <linux/buffer_head.h> #include <linux/module.h> #include <linux/swap.h> #include <linux/proc_fs.h> #include <linux/pagemap.h> #include <linux/seq_file.h> #include <linux/blkdev.h> #include <linux/mutex.h> #include "ext4_jbd2.h" #include "ext4.h" /* * mb_debug() dynamic printk msgs could be used to debug mballoc code. */ #ifdef CONFIG_EXT4_DEBUG #define mb_debug(sb, fmt, ...) \ pr_debug("[%s/%d] EXT4-fs (%s): (%s, %d): %s: " fmt, \ current->comm, task_pid_nr(current), sb->s_id, \ __FILE__, __LINE__, __func__, ##__VA_ARGS__) #else #define mb_debug(sb, fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif #define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ #define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */ /* * How long mballoc can look for a best extent (in found extents) */ #define MB_DEFAULT_MAX_TO_SCAN 200 /* * How long mballoc must look for a best extent */ #define MB_DEFAULT_MIN_TO_SCAN 10 /* * with 'ext4_mb_stats' allocator will collect stats that will be * shown at umount. The collecting costs though! */ #define MB_DEFAULT_STATS 0 /* * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served * by the stream allocator, which purpose is to pack requests * as close each to other as possible to produce smooth I/O traffic * We use locality group prealloc space for stream request. * We can tune the same via /proc/fs/ext4/<parition>/stream_req */ #define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */ /* * for which requests use 2^N search using buddies */ #define MB_DEFAULT_ORDER2_REQS 2 /* * default group prealloc size 512 blocks */ #define MB_DEFAULT_GROUP_PREALLOC 512 /* * maximum length of inode prealloc list */ #define MB_DEFAULT_MAX_INODE_PREALLOC 512 struct ext4_free_data { /* this links the free block information from sb_info */ struct list_head efd_list; /* this links the free block information from group_info */ struct rb_node efd_node; /* group which free block extent belongs */ ext4_group_t efd_group; /* free block extent */ ext4_grpblk_t efd_start_cluster; ext4_grpblk_t efd_count; /* transaction which freed this extent */ tid_t efd_tid; }; struct ext4_prealloc_space { struct list_head pa_inode_list; struct list_head pa_group_list; union { struct list_head pa_tmp_list; struct rcu_head pa_rcu; } u; spinlock_t pa_lock; atomic_t pa_count; unsigned pa_deleted; ext4_fsblk_t pa_pstart; /* phys. block */ ext4_lblk_t pa_lstart; /* log. block */ ext4_grpblk_t pa_len; /* len of preallocated chunk */ ext4_grpblk_t pa_free; /* how many blocks are free */ unsigned short pa_type; /* pa type. inode or group */ spinlock_t *pa_obj_lock; struct inode *pa_inode; /* hack, for history only */ }; enum { MB_INODE_PA = 0, MB_GROUP_PA = 1 }; struct ext4_free_extent { ext4_lblk_t fe_logical; ext4_grpblk_t fe_start; /* In cluster units */ ext4_group_t fe_group; ext4_grpblk_t fe_len; /* In cluster units */ }; /* * Locality group: * we try to group all related changes together * so that writeback can flush/allocate them together as well * Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC * (512). We store prealloc space into the hash based on the pa_free blocks * order value.ie, fls(pa_free)-1; */ #define PREALLOC_TB_SIZE 10 struct ext4_locality_group { /* for allocator */ /* to serialize allocates */ struct mutex lg_mutex; /* list of preallocations */ struct list_head lg_prealloc_list[PREALLOC_TB_SIZE]; spinlock_t lg_prealloc_lock; }; struct ext4_allocation_context { struct inode *ac_inode; struct super_block *ac_sb; /* original request */ struct ext4_free_extent ac_o_ex; /* goal request (normalized ac_o_ex) */ struct ext4_free_extent ac_g_ex; /* the best found extent */ struct ext4_free_extent ac_b_ex; /* copy of the best found extent taken before preallocation efforts */ struct ext4_free_extent ac_f_ex; __u16 ac_groups_scanned; __u16 ac_found; __u16 ac_tail; __u16 ac_buddy; __u16 ac_flags; /* allocation hints */ __u8 ac_status; __u8 ac_criteria; __u8 ac_2order; /* if request is to allocate 2^N blocks and * N > 0, the field stores N, otherwise 0 */ __u8 ac_op; /* operation, for history only */ struct page *ac_bitmap_page; struct page *ac_buddy_page; struct ext4_prealloc_space *ac_pa; struct ext4_locality_group *ac_lg; }; #define AC_STATUS_CONTINUE 1 #define AC_STATUS_FOUND 2 #define AC_STATUS_BREAK 3 struct ext4_buddy { struct page *bd_buddy_page; void *bd_buddy; struct page *bd_bitmap_page; void *bd_bitmap; struct ext4_group_info *bd_info; struct super_block *bd_sb; __u16 bd_blkbits; ext4_group_t bd_group; }; static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, struct ext4_free_extent *fex) { return ext4_group_first_block_no(sb, fex->fe_group) + (fex->fe_start << EXT4_SB(sb)->s_cluster_bits); } typedef int (*ext4_mballoc_query_range_fn)( struct super_block *sb, ext4_group_t agno, ext4_grpblk_t start, ext4_grpblk_t len, void *priv); int ext4_mballoc_query_range( struct super_block *sb, ext4_group_t agno, ext4_grpblk_t start, ext4_grpblk_t end, ext4_mballoc_query_range_fn formatter, void *priv); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 /* SPDX-License-Identifier: GPL-2.0 */ /* * Security server interface. * * Author : Stephen Smalley, <sds@tycho.nsa.gov> * */ #ifndef _SELINUX_SECURITY_H_ #define _SELINUX_SECURITY_H_ #include <linux/compiler.h> #include <linux/dcache.h> #include <linux/magic.h> #include <linux/types.h> #include <linux/rcupdate.h> #include <linux/refcount.h> #include <linux/workqueue.h> #include "flask.h" #include "policycap.h" #define SECSID_NULL 0x00000000 /* unspecified SID */ #define SECSID_WILD 0xffffffff /* wildcard SID */ #define SECCLASS_NULL 0x0000 /* no class */ /* Identify specific policy version changes */ #define POLICYDB_VERSION_BASE 15 #define POLICYDB_VERSION_BOOL 16 #define POLICYDB_VERSION_IPV6 17 #define POLICYDB_VERSION_NLCLASS 18 #define POLICYDB_VERSION_VALIDATETRANS 19 #define POLICYDB_VERSION_MLS 19 #define POLICYDB_VERSION_AVTAB 20 #define POLICYDB_VERSION_RANGETRANS 21 #define POLICYDB_VERSION_POLCAP 22 #define POLICYDB_VERSION_PERMISSIVE 23 #define POLICYDB_VERSION_BOUNDARY 24 #define POLICYDB_VERSION_FILENAME_TRANS 25 #define POLICYDB_VERSION_ROLETRANS 26 #define POLICYDB_VERSION_NEW_OBJECT_DEFAULTS 27 #define POLICYDB_VERSION_DEFAULT_TYPE 28 #define POLICYDB_VERSION_CONSTRAINT_NAMES 29 #define POLICYDB_VERSION_XPERMS_IOCTL 30 #define POLICYDB_VERSION_INFINIBAND 31 #define POLICYDB_VERSION_GLBLUB 32 #define POLICYDB_VERSION_COMP_FTRANS 33 /* compressed filename transitions */ /* Range of policy versions we understand*/ #define POLICYDB_VERSION_MIN POLICYDB_VERSION_BASE #define POLICYDB_VERSION_MAX POLICYDB_VERSION_COMP_FTRANS /* Mask for just the mount related flags */ #define SE_MNTMASK 0x0f /* Super block security struct flags for mount options */ /* BE CAREFUL, these need to be the low order bits for selinux_get_mnt_opts */ #define CONTEXT_MNT 0x01 #define FSCONTEXT_MNT 0x02 #define ROOTCONTEXT_MNT 0x04 #define DEFCONTEXT_MNT 0x08 #define SBLABEL_MNT 0x10 /* Non-mount related flags */ #define SE_SBINITIALIZED 0x0100 #define SE_SBPROC 0x0200 #define SE_SBGENFS 0x0400 #define SE_SBGENFS_XATTR 0x0800 #define CONTEXT_STR "context" #define FSCONTEXT_STR "fscontext" #define ROOTCONTEXT_STR "rootcontext" #define DEFCONTEXT_STR "defcontext" #define SECLABEL_STR "seclabel" struct netlbl_lsm_secattr; extern int selinux_enabled_boot; /* * type_datum properties * available at the kernel policy version >= POLICYDB_VERSION_BOUNDARY */ #define TYPEDATUM_PROPERTY_PRIMARY 0x0001 #define TYPEDATUM_PROPERTY_ATTRIBUTE 0x0002 /* limitation of boundary depth */ #define POLICYDB_BOUNDS_MAXDEPTH 4 struct selinux_avc; struct selinux_policy; struct selinux_state { #ifdef CONFIG_SECURITY_SELINUX_DISABLE bool disabled; #endif #ifdef CONFIG_SECURITY_SELINUX_DEVELOP bool enforcing; #endif bool checkreqprot; bool initialized; bool policycap[__POLICYDB_CAPABILITY_MAX]; struct page *status_page; struct mutex status_lock; struct selinux_avc *avc; struct selinux_policy __rcu *policy; struct mutex policy_mutex; } __randomize_layout; void selinux_avc_init(struct selinux_avc **avc); extern struct selinux_state selinux_state; static inline bool selinux_initialized(const struct selinux_state *state) { /* do a synchronized load to avoid race conditions */ return smp_load_acquire(&state->initialized); } static inline void selinux_mark_initialized(struct selinux_state *state) { /* do a synchronized write to avoid race conditions */ smp_store_release(&state->initialized, true); } #ifdef CONFIG_SECURITY_SELINUX_DEVELOP static inline bool enforcing_enabled(struct selinux_state *state) { return READ_ONCE(state->enforcing); } static inline void enforcing_set(struct selinux_state *state, bool value) { WRITE_ONCE(state->enforcing, value); } #else static inline bool enforcing_enabled(struct selinux_state *state) { return true; } static inline void enforcing_set(struct selinux_state *state, bool value) { } #endif static inline bool checkreqprot_get(const struct selinux_state *state) { return READ_ONCE(state->checkreqprot); } static inline void checkreqprot_set(struct selinux_state *state, bool value) { WRITE_ONCE(state->checkreqprot, value); } #ifdef CONFIG_SECURITY_SELINUX_DISABLE static inline bool selinux_disabled(struct selinux_state *state) { return READ_ONCE(state->disabled); } static inline void selinux_mark_disabled(struct selinux_state *state) { WRITE_ONCE(state->disabled, true); } #else static inline bool selinux_disabled(struct selinux_state *state) { return false; } #endif static inline bool selinux_policycap_netpeer(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_NETPEER]); } static inline bool selinux_policycap_openperm(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_OPENPERM]); } static inline bool selinux_policycap_extsockclass(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_EXTSOCKCLASS]); } static inline bool selinux_policycap_alwaysnetwork(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_ALWAYSNETWORK]); } static inline bool selinux_policycap_cgroupseclabel(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_CGROUPSECLABEL]); } static inline bool selinux_policycap_nnp_nosuid_transition(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION]); } static inline bool selinux_policycap_genfs_seclabel_symlinks(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS]); } struct selinux_policy_convert_data; struct selinux_load_state { struct selinux_policy *policy; struct selinux_policy_convert_data *convert_data; }; int security_mls_enabled(struct selinux_state *state); int security_load_policy(struct selinux_state *state, void *data, size_t len, struct selinux_load_state *load_state); void selinux_policy_commit(struct selinux_state *state, struct selinux_load_state *load_state); void selinux_policy_cancel(struct selinux_state *state, struct selinux_load_state *load_state); int security_read_policy(struct selinux_state *state, void **data, size_t *len); int security_policycap_supported(struct selinux_state *state, unsigned int req_cap); #define SEL_VEC_MAX 32 struct av_decision { u32 allowed; u32 auditallow; u32 auditdeny; u32 seqno; u32 flags; }; #define XPERMS_ALLOWED 1 #define XPERMS_AUDITALLOW 2 #define XPERMS_DONTAUDIT 4 #define security_xperm_set(perms, x) (perms[x >> 5] |= 1 << (x & 0x1f)) #define security_xperm_test(perms, x) (1 & (perms[x >> 5] >> (x & 0x1f))) struct extended_perms_data { u32 p[8]; }; struct extended_perms_decision { u8 used; u8 driver; struct extended_perms_data *allowed; struct extended_perms_data *auditallow; struct extended_perms_data *dontaudit; }; struct extended_perms { u16 len; /* length associated decision chain */ struct extended_perms_data drivers; /* flag drivers that are used */ }; /* definitions of av_decision.flags */ #define AVD_FLAGS_PERMISSIVE 0x0001 void security_compute_av(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, struct av_decision *avd, struct extended_perms *xperms); void security_compute_xperms_decision(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u8 driver, struct extended_perms_decision *xpermd); void security_compute_av_user(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, struct av_decision *avd); int security_transition_sid(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, const struct qstr *qstr, u32 *out_sid); int security_transition_sid_user(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, const char *objname, u32 *out_sid); int security_member_sid(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 *out_sid); int security_change_sid(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 *out_sid); int security_sid_to_context(struct selinux_state *state, u32 sid, char **scontext, u32 *scontext_len); int security_sid_to_context_force(struct selinux_state *state, u32 sid, char **scontext, u32 *scontext_len); int security_sid_to_context_inval(struct selinux_state *state, u32 sid, char **scontext, u32 *scontext_len); int security_context_to_sid(struct selinux_state *state, const char *scontext, u32 scontext_len, u32 *out_sid, gfp_t gfp); int security_context_str_to_sid(struct selinux_state *state, const char *scontext, u32 *out_sid, gfp_t gfp); int security_context_to_sid_default(struct selinux_state *state, const char *scontext, u32 scontext_len, u32 *out_sid, u32 def_sid, gfp_t gfp_flags); int security_context_to_sid_force(struct selinux_state *state, const char *scontext, u32 scontext_len, u32 *sid); int security_get_user_sids(struct selinux_state *state, u32 callsid, char *username, u32 **sids, u32 *nel); int security_port_sid(struct selinux_state *state, u8 protocol, u16 port, u32 *out_sid); int security_ib_pkey_sid(struct selinux_state *state, u64 subnet_prefix, u16 pkey_num, u32 *out_sid); int security_ib_endport_sid(struct selinux_state *state, const char *dev_name, u8 port_num, u32 *out_sid); int security_netif_sid(struct selinux_state *state, char *name, u32 *if_sid); int security_node_sid(struct selinux_state *state, u16 domain, void *addr, u32 addrlen, u32 *out_sid); int security_validate_transition(struct selinux_state *state, u32 oldsid, u32 newsid, u32 tasksid, u16 tclass); int security_validate_transition_user(struct selinux_state *state, u32 oldsid, u32 newsid, u32 tasksid, u16 tclass); int security_bounded_transition(struct selinux_state *state, u32 oldsid, u32 newsid); int security_sid_mls_copy(struct selinux_state *state, u32 sid, u32 mls_sid, u32 *new_sid); int security_net_peersid_resolve(struct selinux_state *state, u32 nlbl_sid, u32 nlbl_type, u32 xfrm_sid, u32 *peer_sid); int security_get_classes(struct selinux_policy *policy, char ***classes, int *nclasses); int security_get_permissions(struct selinux_policy *policy, char *class, char ***perms, int *nperms); int security_get_reject_unknown(struct selinux_state *state); int security_get_allow_unknown(struct selinux_state *state); #define SECURITY_FS_USE_XATTR 1 /* use xattr */ #define SECURITY_FS_USE_TRANS 2 /* use transition SIDs, e.g. devpts/tmpfs */ #define SECURITY_FS_USE_TASK 3 /* use task SIDs, e.g. pipefs/sockfs */ #define SECURITY_FS_USE_GENFS 4 /* use the genfs support */ #define SECURITY_FS_USE_NONE 5 /* no labeling support */ #define SECURITY_FS_USE_MNTPOINT 6 /* use mountpoint labeling */ #define SECURITY_FS_USE_NATIVE 7 /* use native label support */ #define SECURITY_FS_USE_MAX 7 /* Highest SECURITY_FS_USE_XXX */ int security_fs_use(struct selinux_state *state, struct super_block *sb); int security_genfs_sid(struct selinux_state *state, const char *fstype, char *name, u16 sclass, u32 *sid); int selinux_policy_genfs_sid(struct selinux_policy *policy, const char *fstype, char *name, u16 sclass, u32 *sid); #ifdef CONFIG_NETLABEL int security_netlbl_secattr_to_sid(struct selinux_state *state, struct netlbl_lsm_secattr *secattr, u32 *sid); int security_netlbl_sid_to_secattr(struct selinux_state *state, u32 sid, struct netlbl_lsm_secattr *secattr); #else static inline int security_netlbl_secattr_to_sid(struct selinux_state *state, struct netlbl_lsm_secattr *secattr, u32 *sid) { return -EIDRM; } static inline int security_netlbl_sid_to_secattr(struct selinux_state *state, u32 sid, struct netlbl_lsm_secattr *secattr) { return -ENOENT; } #endif /* CONFIG_NETLABEL */ const char *security_get_initial_sid_context(u32 sid); /* * status notifier using mmap interface */ extern struct page *selinux_kernel_status_page(struct selinux_state *state); #define SELINUX_KERNEL_STATUS_VERSION 1 struct selinux_kernel_status { u32 version; /* version number of thie structure */ u32 sequence; /* sequence number of seqlock logic */ u32 enforcing; /* current setting of enforcing mode */ u32 policyload; /* times of policy reloaded */ u32 deny_unknown; /* current setting of deny_unknown */ /* * The version > 0 supports above members. */ } __packed; extern void selinux_status_update_setenforce(struct selinux_state *state, int enforcing); extern void selinux_status_update_policyload(struct selinux_state *state, int seqno); extern void selinux_complete_init(void); extern int selinux_disable(struct selinux_state *state); extern void exit_sel_fs(void); extern struct path selinux_null; extern struct vfsmount *selinuxfs_mount; extern void selnl_notify_setenforce(int val); extern void selnl_notify_policyload(u32 seqno); extern int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm); extern void avtab_cache_init(void); extern void ebitmap_cache_init(void); extern void hashtab_cache_init(void); extern int security_sidtab_hash_stats(struct selinux_state *state, char *page); #endif /* _SELINUX_SECURITY_H_ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_USER_H #define _LINUX_SCHED_USER_H #include <linux/uidgid.h> #include <linux/atomic.h> #include <linux/refcount.h> #include <linux/ratelimit.h> /* * Some day this will be a full-fledged user tracking system.. */ struct user_struct { refcount_t __count; /* reference count */ atomic_t processes; /* How many processes does this user have? */ atomic_t sigpending; /* How many pending signals does this user have? */ #ifdef CONFIG_FANOTIFY atomic_t fanotify_listeners; #endif #ifdef CONFIG_EPOLL atomic_long_t epoll_watches; /* The number of file descriptors currently watched */ #endif #ifdef CONFIG_POSIX_MQUEUE /* protected by mq_lock */ unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ #endif unsigned long locked_shm; /* How many pages of mlocked shm ? */ unsigned long unix_inflight; /* How many files in flight in unix sockets */ atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */ /* Hash table maintenance information */ struct hlist_node uidhash_node; kuid_t uid; #if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) || \ defined(CONFIG_NET) || defined(CONFIG_IO_URING) atomic_long_t locked_vm; #endif #ifdef CONFIG_WATCH_QUEUE atomic_t nr_watches; /* The number of watches this user currently has */ #endif /* Miscellaneous per-user rate limit */ struct ratelimit_state ratelimit; }; extern int uids_sysfs_init(void); extern struct user_struct *find_user(kuid_t); extern struct user_struct root_user; #define INIT_USER (&root_user) /* per-UID process charging. */ extern struct user_struct * alloc_uid(kuid_t); static inline struct user_struct *get_uid(struct user_struct *u) { refcount_inc(&u->__count); return u; } extern void free_uid(struct user_struct *); #endif /* _LINUX_SCHED_USER_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 // SPDX-License-Identifier: GPL-2.0 // Generated by scripts/atomic/gen-atomic-fallback.sh // DO NOT MODIFY THIS FILE DIRECTLY #ifndef _LINUX_ATOMIC_FALLBACK_H #define _LINUX_ATOMIC_FALLBACK_H #include <linux/compiler.h> #ifndef arch_xchg_relaxed #define arch_xchg_relaxed arch_xchg #define arch_xchg_acquire arch_xchg #define arch_xchg_release arch_xchg #else /* arch_xchg_relaxed */ #ifndef arch_xchg_acquire #define arch_xchg_acquire(...) \ __atomic_op_acquire(arch_xchg, __VA_ARGS__) #endif #ifndef arch_xchg_release #define arch_xchg_release(...) \ __atomic_op_release(arch_xchg, __VA_ARGS__) #endif #ifndef arch_xchg #define arch_xchg(...) \ __atomic_op_fence(arch_xchg, __VA_ARGS__) #endif #endif /* arch_xchg_relaxed */ #ifndef arch_cmpxchg_relaxed #define arch_cmpxchg_relaxed arch_cmpxchg #define arch_cmpxchg_acquire arch_cmpxchg #define arch_cmpxchg_release arch_cmpxchg #else /* arch_cmpxchg_relaxed */ #ifndef arch_cmpxchg_acquire #define arch_cmpxchg_acquire(...) \ __atomic_op_acquire(arch_cmpxchg, __VA_ARGS__) #endif #ifndef arch_cmpxchg_release #define arch_cmpxchg_release(...) \ __atomic_op_release(arch_cmpxchg, __VA_ARGS__) #endif #ifndef arch_cmpxchg #define arch_cmpxchg(...) \ __atomic_op_fence(arch_cmpxchg, __VA_ARGS__) #endif #endif /* arch_cmpxchg_relaxed */ #ifndef arch_cmpxchg64_relaxed #define arch_cmpxchg64_relaxed arch_cmpxchg64 #define arch_cmpxchg64_acquire arch_cmpxchg64 #define arch_cmpxchg64_release arch_cmpxchg64 #else /* arch_cmpxchg64_relaxed */ #ifndef arch_cmpxchg64_acquire #define arch_cmpxchg64_acquire(...) \ __atomic_op_acquire(arch_cmpxchg64, __VA_ARGS__) #endif #ifndef arch_cmpxchg64_release #define arch_cmpxchg64_release(...) \ __atomic_op_release(arch_cmpxchg64, __VA_ARGS__) #endif #ifndef arch_cmpxchg64 #define arch_cmpxchg64(...) \ __atomic_op_fence(arch_cmpxchg64, __VA_ARGS__) #endif #endif /* arch_cmpxchg64_relaxed */ #ifndef arch_atomic_read_acquire static __always_inline int arch_atomic_read_acquire(const atomic_t *v) { return smp_load_acquire(&(v)->counter); } #define arch_atomic_read_acquire arch_atomic_read_acquire #endif #ifndef arch_atomic_set_release static __always_inline void arch_atomic_set_release(atomic_t *v, int i) { smp_store_release(&(v)->counter, i); } #define arch_atomic_set_release arch_atomic_set_release #endif #ifndef arch_atomic_add_return_relaxed #define arch_atomic_add_return_acquire arch_atomic_add_return #define arch_atomic_add_return_release arch_atomic_add_return #define arch_atomic_add_return_relaxed arch_atomic_add_return #else /* arch_atomic_add_return_relaxed */ #ifndef arch_atomic_add_return_acquire static __always_inline int arch_atomic_add_return_acquire(int i, atomic_t *v) { int ret = arch_atomic_add_return_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic_add_return_acquire arch_atomic_add_return_acquire #endif #ifndef arch_atomic_add_return_release static __always_inline int arch_atomic_add_return_release(int i, atomic_t *v) { __atomic_release_fence(); return arch_atomic_add_return_relaxed(i, v); } #define arch_atomic_add_return_release arch_atomic_add_return_release #endif #ifndef arch_atomic_add_return static __always_inline int arch_atomic_add_return(int i, atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_add_return_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic_add_return arch_atomic_add_return #endif #endif /* arch_atomic_add_return_relaxed */ #ifndef arch_atomic_fetch_add_relaxed #define arch_atomic_fetch_add_acquire arch_atomic_fetch_add #define arch_atomic_fetch_add_release arch_atomic_fetch_add #define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add #else /* arch_atomic_fetch_add_relaxed */ #ifndef arch_atomic_fetch_add_acquire static __always_inline int arch_atomic_fetch_add_acquire(int i, atomic_t *v) { int ret = arch_atomic_fetch_add_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic_fetch_add_acquire arch_atomic_fetch_add_acquire #endif #ifndef arch_atomic_fetch_add_release static __always_inline int arch_atomic_fetch_add_release(int i, atomic_t *v) { __atomic_release_fence(); return arch_atomic_fetch_add_relaxed(i, v); } #define arch_atomic_fetch_add_release arch_atomic_fetch_add_release #endif #ifndef arch_atomic_fetch_add static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_add_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic_fetch_add arch_atomic_fetch_add #endif #endif /* arch_atomic_fetch_add_relaxed */ #ifndef arch_atomic_sub_return_relaxed #define arch_atomic_sub_return_acquire arch_atomic_sub_return #define arch_atomic_sub_return_release arch_atomic_sub_return #define arch_atomic_sub_return_relaxed arch_atomic_sub_return #else /* arch_atomic_sub_return_relaxed */ #ifndef arch_atomic_sub_return_acquire static __always_inline int arch_atomic_sub_return_acquire(int i, atomic_t *v) { int ret = arch_atomic_sub_return_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic_sub_return_acquire arch_atomic_sub_return_acquire #endif #ifndef arch_atomic_sub_return_release static __always_inline int arch_atomic_sub_return_release(int i, atomic_t *v) { __atomic_release_fence(); return arch_atomic_sub_return_relaxed(i, v); } #define arch_atomic_sub_return_release arch_atomic_sub_return_release #endif #ifndef arch_atomic_sub_return static __always_inline int arch_atomic_sub_return(int i, atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_sub_return_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic_sub_return arch_atomic_sub_return #endif #endif /* arch_atomic_sub_return_relaxed */ #ifndef arch_atomic_fetch_sub_relaxed #define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub #define arch_atomic_fetch_sub_release arch_atomic_fetch_sub #define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub #else /* arch_atomic_fetch_sub_relaxed */ #ifndef arch_atomic_fetch_sub_acquire static __always_inline int arch_atomic_fetch_sub_acquire(int i, atomic_t *v) { int ret = arch_atomic_fetch_sub_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub_acquire #endif #ifndef arch_atomic_fetch_sub_release static __always_inline int arch_atomic_fetch_sub_release(int i, atomic_t *v) { __atomic_release_fence(); return arch_atomic_fetch_sub_relaxed(i, v); } #define arch_atomic_fetch_sub_release arch_atomic_fetch_sub_release #endif #ifndef arch_atomic_fetch_sub static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_sub_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic_fetch_sub arch_atomic_fetch_sub #endif #endif /* arch_atomic_fetch_sub_relaxed */ #ifndef arch_atomic_inc static __always_inline void arch_atomic_inc(atomic_t *v) { arch_atomic_add(1, v); } #define arch_atomic_inc arch_atomic_inc #endif #ifndef arch_atomic_inc_return_relaxed #ifdef arch_atomic_inc_return #define arch_atomic_inc_return_acquire arch_atomic_inc_return #define arch_atomic_inc_return_release arch_atomic_inc_return #define arch_atomic_inc_return_relaxed arch_atomic_inc_return #endif /* arch_atomic_inc_return */ #ifndef arch_atomic_inc_return static __always_inline int arch_atomic_inc_return(atomic_t *v) { return arch_atomic_add_return(1, v); } #define arch_atomic_inc_return arch_atomic_inc_return #endif #ifndef arch_atomic_inc_return_acquire static __always_inline int arch_atomic_inc_return_acquire(atomic_t *v) { return arch_atomic_add_return_acquire(1, v); } #define arch_atomic_inc_return_acquire arch_atomic_inc_return_acquire #endif #ifndef arch_atomic_inc_return_release static __always_inline int arch_atomic_inc_return_release(atomic_t *v) { return arch_atomic_add_return_release(1, v); } #define arch_atomic_inc_return_release arch_atomic_inc_return_release #endif #ifndef arch_atomic_inc_return_relaxed static __always_inline int arch_atomic_inc_return_relaxed(atomic_t *v) { return arch_atomic_add_return_relaxed(1, v); } #define arch_atomic_inc_return_relaxed arch_atomic_inc_return_relaxed #endif #else /* arch_atomic_inc_return_relaxed */ #ifndef arch_atomic_inc_return_acquire static __always_inline int arch_atomic_inc_return_acquire(atomic_t *v) { int ret = arch_atomic_inc_return_relaxed(v); __atomic_acquire_fence(); return ret; } #define arch_atomic_inc_return_acquire arch_atomic_inc_return_acquire #endif #ifndef arch_atomic_inc_return_release static __always_inline int arch_atomic_inc_return_release(atomic_t *v) { __atomic_release_fence(); return arch_atomic_inc_return_relaxed(v); } #define arch_atomic_inc_return_release arch_atomic_inc_return_release #endif #ifndef arch_atomic_inc_return static __always_inline int arch_atomic_inc_return(atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_inc_return_relaxed(v); __atomic_post_full_fence(); return ret; } #define arch_atomic_inc_return arch_atomic_inc_return #endif #endif /* arch_atomic_inc_return_relaxed */ #ifndef arch_atomic_fetch_inc_relaxed #ifdef arch_atomic_fetch_inc #define arch_atomic_fetch_inc_acquire arch_atomic_fetch_inc #define arch_atomic_fetch_inc_release arch_atomic_fetch_inc #define arch_atomic_fetch_inc_relaxed arch_atomic_fetch_inc #endif /* arch_atomic_fetch_inc */ #ifndef arch_atomic_fetch_inc static __always_inline int arch_atomic_fetch_inc(atomic_t *v) { return arch_atomic_fetch_add(1, v); } #define arch_atomic_fetch_inc arch_atomic_fetch_inc #endif #ifndef arch_atomic_fetch_inc_acquire static __always_inline int arch_atomic_fetch_inc_acquire(atomic_t *v) { return arch_atomic_fetch_add_acquire(1, v); } #define arch_atomic_fetch_inc_acquire arch_atomic_fetch_inc_acquire #endif #ifndef arch_atomic_fetch_inc_release static __always_inline int arch_atomic_fetch_inc_release(atomic_t *v) { return arch_atomic_fetch_add_release(1, v); } #define arch_atomic_fetch_inc_release arch_atomic_fetch_inc_release #endif #ifndef arch_atomic_fetch_inc_relaxed static __always_inline int arch_atomic_fetch_inc_relaxed(atomic_t *v) { return arch_atomic_fetch_add_relaxed(1, v); } #define arch_atomic_fetch_inc_relaxed arch_atomic_fetch_inc_relaxed #endif #else /* arch_atomic_fetch_inc_relaxed */ #ifndef arch_atomic_fetch_inc_acquire static __always_inline int arch_atomic_fetch_inc_acquire(atomic_t *v) { int ret = arch_atomic_fetch_inc_relaxed(v); __atomic_acquire_fence(); return ret; } #define arch_atomic_fetch_inc_acquire arch_atomic_fetch_inc_acquire #endif #ifndef arch_atomic_fetch_inc_release static __always_inline int arch_atomic_fetch_inc_release(atomic_t *v) { __atomic_release_fence(); return arch_atomic_fetch_inc_relaxed(v); } #define arch_atomic_fetch_inc_release arch_atomic_fetch_inc_release #endif #ifndef arch_atomic_fetch_inc static __always_inline int arch_atomic_fetch_inc(atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_inc_relaxed(v); __atomic_post_full_fence(); return ret; } #define arch_atomic_fetch_inc arch_atomic_fetch_inc #endif #endif /* arch_atomic_fetch_inc_relaxed */ #ifndef arch_atomic_dec static __always_inline void arch_atomic_dec(atomic_t *v) { arch_atomic_sub(1, v); } #define arch_atomic_dec arch_atomic_dec #endif #ifndef arch_atomic_dec_return_relaxed #ifdef arch_atomic_dec_return #define arch_atomic_dec_return_acquire arch_atomic_dec_return #define arch_atomic_dec_return_release arch_atomic_dec_return #define arch_atomic_dec_return_relaxed arch_atomic_dec_return #endif /* arch_atomic_dec_return */ #ifndef arch_atomic_dec_return static __always_inline int arch_atomic_dec_return(atomic_t *v) { return arch_atomic_sub_return(1, v); } #define arch_atomic_dec_return arch_atomic_dec_return #endif #ifndef arch_atomic_dec_return_acquire static __always_inline int arch_atomic_dec_return_acquire(atomic_t *v) { return arch_atomic_sub_return_acquire(1, v); } #define arch_atomic_dec_return_acquire arch_atomic_dec_return_acquire #endif #ifndef arch_atomic_dec_return_release static __always_inline int arch_atomic_dec_return_release(atomic_t *v) { return arch_atomic_sub_return_release(1, v); } #define arch_atomic_dec_return_release arch_atomic_dec_return_release #endif #ifndef arch_atomic_dec_return_relaxed static __always_inline int arch_atomic_dec_return_relaxed(atomic_t *v) { return arch_atomic_sub_return_relaxed(1, v); } #define arch_atomic_dec_return_relaxed arch_atomic_dec_return_relaxed #endif #else /* arch_atomic_dec_return_relaxed */ #ifndef arch_atomic_dec_return_acquire static __always_inline int arch_atomic_dec_return_acquire(atomic_t *v) { int ret = arch_atomic_dec_return_relaxed(v); __atomic_acquire_fence(); return ret; } #define arch_atomic_dec_return_acquire arch_atomic_dec_return_acquire #endif #ifndef arch_atomic_dec_return_release static __always_inline int arch_atomic_dec_return_release(atomic_t *v) { __atomic_release_fence(); return arch_atomic_dec_return_relaxed(v); } #define arch_atomic_dec_return_release arch_atomic_dec_return_release #endif #ifndef arch_atomic_dec_return static __always_inline int arch_atomic_dec_return(atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_dec_return_relaxed(v); __atomic_post_full_fence(); return ret; } #define arch_atomic_dec_return arch_atomic_dec_return #endif #endif /* arch_atomic_dec_return_relaxed */ #ifndef arch_atomic_fetch_dec_relaxed #ifdef arch_atomic_fetch_dec #define arch_atomic_fetch_dec_acquire arch_atomic_fetch_dec #define arch_atomic_fetch_dec_release arch_atomic_fetch_dec #define arch_atomic_fetch_dec_relaxed arch_atomic_fetch_dec #endif /* arch_atomic_fetch_dec */ #ifndef arch_atomic_fetch_dec static __always_inline int arch_atomic_fetch_dec(atomic_t *v) { return arch_atomic_fetch_sub(1, v); } #define arch_atomic_fetch_dec arch_atomic_fetch_dec #endif #ifndef arch_atomic_fetch_dec_acquire static __always_inline int arch_atomic_fetch_dec_acquire(atomic_t *v) { return arch_atomic_fetch_sub_acquire(1, v); } #define arch_atomic_fetch_dec_acquire arch_atomic_fetch_dec_acquire #endif #ifndef arch_atomic_fetch_dec_release static __always_inline int arch_atomic_fetch_dec_release(atomic_t *v) { return arch_atomic_fetch_sub_release(1, v); } #define arch_atomic_fetch_dec_release arch_atomic_fetch_dec_release #endif #ifndef arch_atomic_fetch_dec_relaxed static __always_inline int arch_atomic_fetch_dec_relaxed(atomic_t *v) { return arch_atomic_fetch_sub_relaxed(1, v); } #define arch_atomic_fetch_dec_relaxed arch_atomic_fetch_dec_relaxed #endif #else /* arch_atomic_fetch_dec_relaxed */ #ifndef arch_atomic_fetch_dec_acquire static __always_inline int arch_atomic_fetch_dec_acquire(atomic_t *v) { int ret = arch_atomic_fetch_dec_relaxed(v); __atomic_acquire_fence(); return ret; } #define arch_atomic_fetch_dec_acquire arch_atomic_fetch_dec_acquire #endif #ifndef arch_atomic_fetch_dec_release static __always_inline int arch_atomic_fetch_dec_release(atomic_t *v) { __atomic_release_fence(); return arch_atomic_fetch_dec_relaxed(v); } #define arch_atomic_fetch_dec_release arch_atomic_fetch_dec_release #endif #ifndef arch_atomic_fetch_dec static __always_inline int arch_atomic_fetch_dec(atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_dec_relaxed(v); __atomic_post_full_fence(); return ret; } #define arch_atomic_fetch_dec arch_atomic_fetch_dec #endif #endif /* arch_atomic_fetch_dec_relaxed */ #ifndef arch_atomic_fetch_and_relaxed #define arch_atomic_fetch_and_acquire arch_atomic_fetch_and #define arch_atomic_fetch_and_release arch_atomic_fetch_and #define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and #else /* arch_atomic_fetch_and_relaxed */ #ifndef arch_atomic_fetch_and_acquire static __always_inline int arch_atomic_fetch_and_acquire(int i, atomic_t *v) { int ret = arch_atomic_fetch_and_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic_fetch_and_acquire arch_atomic_fetch_and_acquire #endif #ifndef arch_atomic_fetch_and_release static __always_inline int arch_atomic_fetch_and_release(int i, atomic_t *v) { __atomic_release_fence(); return arch_atomic_fetch_and_relaxed(i, v); } #define arch_atomic_fetch_and_release arch_atomic_fetch_and_release #endif #ifndef arch_atomic_fetch_and static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_and_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic_fetch_and arch_atomic_fetch_and #endif #endif /* arch_atomic_fetch_and_relaxed */ #ifndef arch_atomic_andnot static __always_inline void arch_atomic_andnot(int i, atomic_t *v) { arch_atomic_and(~i, v); } #define arch_atomic_andnot arch_atomic_andnot #endif #ifndef arch_atomic_fetch_andnot_relaxed #ifdef arch_atomic_fetch_andnot #define arch_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot #define arch_atomic_fetch_andnot_release arch_atomic_fetch_andnot #define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot #endif /* arch_atomic_fetch_andnot */ #ifndef arch_atomic_fetch_andnot static __always_inline int arch_atomic_fetch_andnot(int i, atomic_t *v) { return arch_atomic_fetch_and(~i, v); } #define arch_atomic_fetch_andnot arch_atomic_fetch_andnot #endif #ifndef arch_atomic_fetch_andnot_acquire static __always_inline int arch_atomic_fetch_andnot_acquire(int i, atomic_t *v) { return arch_atomic_fetch_and_acquire(~i, v); } #define arch_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot_acquire #endif #ifndef arch_atomic_fetch_andnot_release static __always_inline int arch_atomic_fetch_andnot_release(int i, atomic_t *v) { return arch_atomic_fetch_and_release(~i, v); } #define arch_atomic_fetch_andnot_release arch_atomic_fetch_andnot_release #endif #ifndef arch_atomic_fetch_andnot_relaxed static __always_inline int arch_atomic_fetch_andnot_relaxed(int i, atomic_t *v) { return arch_atomic_fetch_and_relaxed(~i, v); } #define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot_relaxed #endif #else /* arch_atomic_fetch_andnot_relaxed */ #ifndef arch_atomic_fetch_andnot_acquire static __always_inline int arch_atomic_fetch_andnot_acquire(int i, atomic_t *v) { int ret = arch_atomic_fetch_andnot_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot_acquire #endif #ifndef arch_atomic_fetch_andnot_release static __always_inline int arch_atomic_fetch_andnot_release(int i, atomic_t *v) { __atomic_release_fence(); return arch_atomic_fetch_andnot_relaxed(i, v); } #define arch_atomic_fetch_andnot_release arch_atomic_fetch_andnot_release #endif #ifndef arch_atomic_fetch_andnot static __always_inline int arch_atomic_fetch_andnot(int i, atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_andnot_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic_fetch_andnot arch_atomic_fetch_andnot #endif #endif /* arch_atomic_fetch_andnot_relaxed */ #ifndef arch_atomic_fetch_or_relaxed #define arch_atomic_fetch_or_acquire arch_atomic_fetch_or #define arch_atomic_fetch_or_release arch_atomic_fetch_or #define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or #else /* arch_atomic_fetch_or_relaxed */ #ifndef arch_atomic_fetch_or_acquire static __always_inline int arch_atomic_fetch_or_acquire(int i, atomic_t *v) { int ret = arch_atomic_fetch_or_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic_fetch_or_acquire arch_atomic_fetch_or_acquire #endif #ifndef arch_atomic_fetch_or_release static __always_inline int arch_atomic_fetch_or_release(int i, atomic_t *v) { __atomic_release_fence(); return arch_atomic_fetch_or_relaxed(i, v); } #define arch_atomic_fetch_or_release arch_atomic_fetch_or_release #endif #ifndef arch_atomic_fetch_or static __always_inline int arch_atomic_fetch_or(int i, atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_or_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic_fetch_or arch_atomic_fetch_or #endif #endif /* arch_atomic_fetch_or_relaxed */ #ifndef arch_atomic_fetch_xor_relaxed #define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor #define arch_atomic_fetch_xor_release arch_atomic_fetch_xor #define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor #else /* arch_atomic_fetch_xor_relaxed */ #ifndef arch_atomic_fetch_xor_acquire static __always_inline int arch_atomic_fetch_xor_acquire(int i, atomic_t *v) { int ret = arch_atomic_fetch_xor_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor_acquire #endif #ifndef arch_atomic_fetch_xor_release static __always_inline int arch_atomic_fetch_xor_release(int i, atomic_t *v) { __atomic_release_fence(); return arch_atomic_fetch_xor_relaxed(i, v); } #define arch_atomic_fetch_xor_release arch_atomic_fetch_xor_release #endif #ifndef arch_atomic_fetch_xor static __always_inline int arch_atomic_fetch_xor(int i, atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_xor_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic_fetch_xor arch_atomic_fetch_xor #endif #endif /* arch_atomic_fetch_xor_relaxed */ #ifndef arch_atomic_xchg_relaxed #define arch_atomic_xchg_acquire arch_atomic_xchg #define arch_atomic_xchg_release arch_atomic_xchg #define arch_atomic_xchg_relaxed arch_atomic_xchg #else /* arch_atomic_xchg_relaxed */ #ifndef arch_atomic_xchg_acquire static __always_inline int arch_atomic_xchg_acquire(atomic_t *v, int i) { int ret = arch_atomic_xchg_relaxed(v, i); __atomic_acquire_fence(); return ret; } #define arch_atomic_xchg_acquire arch_atomic_xchg_acquire #endif #ifndef arch_atomic_xchg_release static __always_inline int arch_atomic_xchg_release(atomic_t *v, int i) { __atomic_release_fence(); return arch_atomic_xchg_relaxed(v, i); } #define arch_atomic_xchg_release arch_atomic_xchg_release #endif #ifndef arch_atomic_xchg static __always_inline int arch_atomic_xchg(atomic_t *v, int i) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_xchg_relaxed(v, i); __atomic_post_full_fence(); return ret; } #define arch_atomic_xchg arch_atomic_xchg #endif #endif /* arch_atomic_xchg_relaxed */ #ifndef arch_atomic_cmpxchg_relaxed #define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg #define arch_atomic_cmpxchg_release arch_atomic_cmpxchg #define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg #else /* arch_atomic_cmpxchg_relaxed */ #ifndef arch_atomic_cmpxchg_acquire static __always_inline int arch_atomic_cmpxchg_acquire(atomic_t *v, int old, int new) { int ret = arch_atomic_cmpxchg_relaxed(v, old, new); __atomic_acquire_fence(); return ret; } #define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire #endif #ifndef arch_atomic_cmpxchg_release static __always_inline int arch_atomic_cmpxchg_release(atomic_t *v, int old, int new) { __atomic_release_fence(); return arch_atomic_cmpxchg_relaxed(v, old, new); } #define arch_atomic_cmpxchg_release arch_atomic_cmpxchg_release #endif #ifndef arch_atomic_cmpxchg static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_cmpxchg_relaxed(v, old, new); __atomic_post_full_fence(); return ret; } #define arch_atomic_cmpxchg arch_atomic_cmpxchg #endif #endif /* arch_atomic_cmpxchg_relaxed */ #ifndef arch_atomic_try_cmpxchg_relaxed #ifdef arch_atomic_try_cmpxchg #define arch_atomic_try_cmpxchg_acquire arch_atomic_try_cmpxchg #define arch_atomic_try_cmpxchg_release arch_atomic_try_cmpxchg #define arch_atomic_try_cmpxchg_relaxed arch_atomic_try_cmpxchg #endif /* arch_atomic_try_cmpxchg */ #ifndef arch_atomic_try_cmpxchg static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new) { int r, o = *old; r = arch_atomic_cmpxchg(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); } #define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg #endif #ifndef arch_atomic_try_cmpxchg_acquire static __always_inline bool arch_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) { int r, o = *old; r = arch_atomic_cmpxchg_acquire(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); } #define arch_atomic_try_cmpxchg_acquire arch_atomic_try_cmpxchg_acquire #endif #ifndef arch_atomic_try_cmpxchg_release static __always_inline bool arch_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) { int r, o = *old; r = arch_atomic_cmpxchg_release(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); } #define arch_atomic_try_cmpxchg_release arch_atomic_try_cmpxchg_release #endif #ifndef arch_atomic_try_cmpxchg_relaxed static __always_inline bool arch_atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new) { int r, o = *old; r = arch_atomic_cmpxchg_relaxed(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); } #define arch_atomic_try_cmpxchg_relaxed arch_atomic_try_cmpxchg_relaxed #endif #else /* arch_atomic_try_cmpxchg_relaxed */ #ifndef arch_atomic_try_cmpxchg_acquire static __always_inline bool arch_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) { bool ret = arch_atomic_try_cmpxchg_relaxed(v, old, new); __atomic_acquire_fence(); return ret; } #define arch_atomic_try_cmpxchg_acquire arch_atomic_try_cmpxchg_acquire #endif #ifndef arch_atomic_try_cmpxchg_release static __always_inline bool arch_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) { __atomic_release_fence(); return arch_atomic_try_cmpxchg_relaxed(v, old, new); } #define arch_atomic_try_cmpxchg_release arch_atomic_try_cmpxchg_release #endif #ifndef arch_atomic_try_cmpxchg static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new) { bool ret; __atomic_pre_full_fence(); ret = arch_atomic_try_cmpxchg_relaxed(v, old, new); __atomic_post_full_fence(); return ret; } #define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg #endif #endif /* arch_atomic_try_cmpxchg_relaxed */ #ifndef arch_atomic_sub_and_test /** * arch_atomic_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @v: pointer of type atomic_t * * Atomically subtracts @i from @v and returns * true if the result is zero, or false for all * other cases. */ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) { return arch_atomic_sub_return(i, v) == 0; } #define arch_atomic_sub_and_test arch_atomic_sub_and_test #endif #ifndef arch_atomic_dec_and_test /** * arch_atomic_dec_and_test - decrement and test * @v: pointer of type atomic_t * * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other * cases. */ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) { return arch_atomic_dec_return(v) == 0; } #define arch_atomic_dec_and_test arch_atomic_dec_and_test #endif #ifndef arch_atomic_inc_and_test /** * arch_atomic_inc_and_test - increment and test * @v: pointer of type atomic_t * * Atomically increments @v by 1 * and returns true if the result is zero, or false for all * other cases. */ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) { return arch_atomic_inc_return(v) == 0; } #define arch_atomic_inc_and_test arch_atomic_inc_and_test #endif #ifndef arch_atomic_add_negative /** * arch_atomic_add_negative - add and test if negative * @i: integer value to add * @v: pointer of type atomic_t * * Atomically adds @i to @v and returns true * if the result is negative, or false when * result is greater than or equal to zero. */ static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v) { return arch_atomic_add_return(i, v) < 0; } #define arch_atomic_add_negative arch_atomic_add_negative #endif #ifndef arch_atomic_fetch_add_unless /** * arch_atomic_fetch_add_unless - add unless the number is already a given value * @v: pointer of type atomic_t * @a: the amount to add to v... * @u: ...unless v is equal to u. * * Atomically adds @a to @v, so long as @v was not already @u. * Returns original value of @v */ static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) { int c = arch_atomic_read(v); do { if (unlikely(c == u)) break; } while (!arch_atomic_try_cmpxchg(v, &c, c + a)); return c; } #define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless #endif #ifndef arch_atomic_add_unless /** * arch_atomic_add_unless - add unless the number is already a given value * @v: pointer of type atomic_t * @a: the amount to add to v... * @u: ...unless v is equal to u. * * Atomically adds @a to @v, if @v was not already @u. * Returns true if the addition was done. */ static __always_inline bool arch_atomic_add_unless(atomic_t *v, int a, int u) { return arch_atomic_fetch_add_unless(v, a, u) != u; } #define arch_atomic_add_unless arch_atomic_add_unless #endif #ifndef arch_atomic_inc_not_zero /** * arch_atomic_inc_not_zero - increment unless the number is zero * @v: pointer of type atomic_t * * Atomically increments @v by 1, if @v is non-zero. * Returns true if the increment was done. */ static __always_inline bool arch_atomic_inc_not_zero(atomic_t *v) { return arch_atomic_add_unless(v, 1, 0); } #define arch_atomic_inc_not_zero arch_atomic_inc_not_zero #endif #ifndef arch_atomic_inc_unless_negative static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v) { int c = arch_atomic_read(v); do { if (unlikely(c < 0)) return false; } while (!arch_atomic_try_cmpxchg(v, &c, c + 1)); return true; } #define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative #endif #ifndef arch_atomic_dec_unless_positive static __always_inline bool arch_atomic_dec_unless_positive(atomic_t *v) { int c = arch_atomic_read(v); do { if (unlikely(c > 0)) return false; } while (!arch_atomic_try_cmpxchg(v, &c, c - 1)); return true; } #define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive #endif #ifndef arch_atomic_dec_if_positive static __always_inline int arch_atomic_dec_if_positive(atomic_t *v) { int dec, c = arch_atomic_read(v); do { dec = c - 1; if (unlikely(dec < 0)) break; } while (!arch_atomic_try_cmpxchg(v, &c, dec)); return dec; } #define arch_atomic_dec_if_positive arch_atomic_dec_if_positive #endif #ifdef CONFIG_GENERIC_ATOMIC64 #include <asm-generic/atomic64.h> #endif #ifndef arch_atomic64_read_acquire static __always_inline s64 arch_atomic64_read_acquire(const atomic64_t *v) { return smp_load_acquire(&(v)->counter); } #define arch_atomic64_read_acquire arch_atomic64_read_acquire #endif #ifndef arch_atomic64_set_release static __always_inline void arch_atomic64_set_release(atomic64_t *v, s64 i) { smp_store_release(&(v)->counter, i); } #define arch_atomic64_set_release arch_atomic64_set_release #endif #ifndef arch_atomic64_add_return_relaxed #define arch_atomic64_add_return_acquire arch_atomic64_add_return #define arch_atomic64_add_return_release arch_atomic64_add_return #define arch_atomic64_add_return_relaxed arch_atomic64_add_return #else /* arch_atomic64_add_return_relaxed */ #ifndef arch_atomic64_add_return_acquire static __always_inline s64 arch_atomic64_add_return_acquire(s64 i, atomic64_t *v) { s64 ret = arch_atomic64_add_return_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_add_return_acquire arch_atomic64_add_return_acquire #endif #ifndef arch_atomic64_add_return_release static __always_inline s64 arch_atomic64_add_return_release(s64 i, atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_add_return_relaxed(i, v); } #define arch_atomic64_add_return_release arch_atomic64_add_return_release #endif #ifndef arch_atomic64_add_return static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_add_return_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_add_return arch_atomic64_add_return #endif #endif /* arch_atomic64_add_return_relaxed */ #ifndef arch_atomic64_fetch_add_relaxed #define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add #define arch_atomic64_fetch_add_release arch_atomic64_fetch_add #define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add #else /* arch_atomic64_fetch_add_relaxed */ #ifndef arch_atomic64_fetch_add_acquire static __always_inline s64 arch_atomic64_fetch_add_acquire(s64 i, atomic64_t *v) { s64 ret = arch_atomic64_fetch_add_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add_acquire #endif #ifndef arch_atomic64_fetch_add_release static __always_inline s64 arch_atomic64_fetch_add_release(s64 i, atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_fetch_add_relaxed(i, v); } #define arch_atomic64_fetch_add_release arch_atomic64_fetch_add_release #endif #ifndef arch_atomic64_fetch_add static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_add_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_fetch_add arch_atomic64_fetch_add #endif #endif /* arch_atomic64_fetch_add_relaxed */ #ifndef arch_atomic64_sub_return_relaxed #define arch_atomic64_sub_return_acquire arch_atomic64_sub_return #define arch_atomic64_sub_return_release arch_atomic64_sub_return #define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return #else /* arch_atomic64_sub_return_relaxed */ #ifndef arch_atomic64_sub_return_acquire static __always_inline s64 arch_atomic64_sub_return_acquire(s64 i, atomic64_t *v) { s64 ret = arch_atomic64_sub_return_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_sub_return_acquire arch_atomic64_sub_return_acquire #endif #ifndef arch_atomic64_sub_return_release static __always_inline s64 arch_atomic64_sub_return_release(s64 i, atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_sub_return_relaxed(i, v); } #define arch_atomic64_sub_return_release arch_atomic64_sub_return_release #endif #ifndef arch_atomic64_sub_return static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_sub_return_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_sub_return arch_atomic64_sub_return #endif #endif /* arch_atomic64_sub_return_relaxed */ #ifndef arch_atomic64_fetch_sub_relaxed #define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub #define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub #define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub #else /* arch_atomic64_fetch_sub_relaxed */ #ifndef arch_atomic64_fetch_sub_acquire static __always_inline s64 arch_atomic64_fetch_sub_acquire(s64 i, atomic64_t *v) { s64 ret = arch_atomic64_fetch_sub_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub_acquire #endif #ifndef arch_atomic64_fetch_sub_release static __always_inline s64 arch_atomic64_fetch_sub_release(s64 i, atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_fetch_sub_relaxed(i, v); } #define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub_release #endif #ifndef arch_atomic64_fetch_sub static __always_inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_sub_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_fetch_sub arch_atomic64_fetch_sub #endif #endif /* arch_atomic64_fetch_sub_relaxed */ #ifndef arch_atomic64_inc static __always_inline void arch_atomic64_inc(atomic64_t *v) { arch_atomic64_add(1, v); } #define arch_atomic64_inc arch_atomic64_inc #endif #ifndef arch_atomic64_inc_return_relaxed #ifdef arch_atomic64_inc_return #define arch_atomic64_inc_return_acquire arch_atomic64_inc_return #define arch_atomic64_inc_return_release arch_atomic64_inc_return #define arch_atomic64_inc_return_relaxed arch_atomic64_inc_return #endif /* arch_atomic64_inc_return */ #ifndef arch_atomic64_inc_return static __always_inline s64 arch_atomic64_inc_return(atomic64_t *v) { return arch_atomic64_add_return(1, v); } #define arch_atomic64_inc_return arch_atomic64_inc_return #endif #ifndef arch_atomic64_inc_return_acquire static __always_inline s64 arch_atomic64_inc_return_acquire(atomic64_t *v) { return arch_atomic64_add_return_acquire(1, v); } #define arch_atomic64_inc_return_acquire arch_atomic64_inc_return_acquire #endif #ifndef arch_atomic64_inc_return_release static __always_inline s64 arch_atomic64_inc_return_release(atomic64_t *v) { return arch_atomic64_add_return_release(1, v); } #define arch_atomic64_inc_return_release arch_atomic64_inc_return_release #endif #ifndef arch_atomic64_inc_return_relaxed static __always_inline s64 arch_atomic64_inc_return_relaxed(atomic64_t *v) { return arch_atomic64_add_return_relaxed(1, v); } #define arch_atomic64_inc_return_relaxed arch_atomic64_inc_return_relaxed #endif #else /* arch_atomic64_inc_return_relaxed */ #ifndef arch_atomic64_inc_return_acquire static __always_inline s64 arch_atomic64_inc_return_acquire(atomic64_t *v) { s64 ret = arch_atomic64_inc_return_relaxed(v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_inc_return_acquire arch_atomic64_inc_return_acquire #endif #ifndef arch_atomic64_inc_return_release static __always_inline s64 arch_atomic64_inc_return_release(atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_inc_return_relaxed(v); } #define arch_atomic64_inc_return_release arch_atomic64_inc_return_release #endif #ifndef arch_atomic64_inc_return static __always_inline s64 arch_atomic64_inc_return(atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_inc_return_relaxed(v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_inc_return arch_atomic64_inc_return #endif #endif /* arch_atomic64_inc_return_relaxed */ #ifndef arch_atomic64_fetch_inc_relaxed #ifdef arch_atomic64_fetch_inc #define arch_atomic64_fetch_inc_acquire arch_atomic64_fetch_inc #define arch_atomic64_fetch_inc_release arch_atomic64_fetch_inc #define arch_atomic64_fetch_inc_relaxed arch_atomic64_fetch_inc #endif /* arch_atomic64_fetch_inc */ #ifndef arch_atomic64_fetch_inc static __always_inline s64 arch_atomic64_fetch_inc(atomic64_t *v) { return arch_atomic64_fetch_add(1, v); } #define arch_atomic64_fetch_inc arch_atomic64_fetch_inc #endif #ifndef arch_atomic64_fetch_inc_acquire static __always_inline s64 arch_atomic64_fetch_inc_acquire(atomic64_t *v) { return arch_atomic64_fetch_add_acquire(1, v); } #define arch_atomic64_fetch_inc_acquire arch_atomic64_fetch_inc_acquire #endif #ifndef arch_atomic64_fetch_inc_release static __always_inline s64 arch_atomic64_fetch_inc_release(atomic64_t *v) { return arch_atomic64_fetch_add_release(1, v); } #define arch_atomic64_fetch_inc_release arch_atomic64_fetch_inc_release #endif #ifndef arch_atomic64_fetch_inc_relaxed static __always_inline s64 arch_atomic64_fetch_inc_relaxed(atomic64_t *v) { return arch_atomic64_fetch_add_relaxed(1, v); } #define arch_atomic64_fetch_inc_relaxed arch_atomic64_fetch_inc_relaxed #endif #else /* arch_atomic64_fetch_inc_relaxed */ #ifndef arch_atomic64_fetch_inc_acquire static __always_inline s64 arch_atomic64_fetch_inc_acquire(atomic64_t *v) { s64 ret = arch_atomic64_fetch_inc_relaxed(v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_fetch_inc_acquire arch_atomic64_fetch_inc_acquire #endif #ifndef arch_atomic64_fetch_inc_release static __always_inline s64 arch_atomic64_fetch_inc_release(atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_fetch_inc_relaxed(v); } #define arch_atomic64_fetch_inc_release arch_atomic64_fetch_inc_release #endif #ifndef arch_atomic64_fetch_inc static __always_inline s64 arch_atomic64_fetch_inc(atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_inc_relaxed(v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_fetch_inc arch_atomic64_fetch_inc #endif #endif /* arch_atomic64_fetch_inc_relaxed */ #ifndef arch_atomic64_dec static __always_inline void arch_atomic64_dec(atomic64_t *v) { arch_atomic64_sub(1, v); } #define arch_atomic64_dec arch_atomic64_dec #endif #ifndef arch_atomic64_dec_return_relaxed #ifdef arch_atomic64_dec_return #define arch_atomic64_dec_return_acquire arch_atomic64_dec_return #define arch_atomic64_dec_return_release arch_atomic64_dec_return #define arch_atomic64_dec_return_relaxed arch_atomic64_dec_return #endif /* arch_atomic64_dec_return */ #ifndef arch_atomic64_dec_return static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v) { return arch_atomic64_sub_return(1, v); } #define arch_atomic64_dec_return arch_atomic64_dec_return #endif #ifndef arch_atomic64_dec_return_acquire static __always_inline s64 arch_atomic64_dec_return_acquire(atomic64_t *v) { return arch_atomic64_sub_return_acquire(1, v); } #define arch_atomic64_dec_return_acquire arch_atomic64_dec_return_acquire #endif #ifndef arch_atomic64_dec_return_release static __always_inline s64 arch_atomic64_dec_return_release(atomic64_t *v) { return arch_atomic64_sub_return_release(1, v); } #define arch_atomic64_dec_return_release arch_atomic64_dec_return_release #endif #ifndef arch_atomic64_dec_return_relaxed static __always_inline s64 arch_atomic64_dec_return_relaxed(atomic64_t *v) { return arch_atomic64_sub_return_relaxed(1, v); } #define arch_atomic64_dec_return_relaxed arch_atomic64_dec_return_relaxed #endif #else /* arch_atomic64_dec_return_relaxed */ #ifndef arch_atomic64_dec_return_acquire static __always_inline s64 arch_atomic64_dec_return_acquire(atomic64_t *v) { s64 ret = arch_atomic64_dec_return_relaxed(v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_dec_return_acquire arch_atomic64_dec_return_acquire #endif #ifndef arch_atomic64_dec_return_release static __always_inline s64 arch_atomic64_dec_return_release(atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_dec_return_relaxed(v); } #define arch_atomic64_dec_return_release arch_atomic64_dec_return_release #endif #ifndef arch_atomic64_dec_return static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_dec_return_relaxed(v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_dec_return arch_atomic64_dec_return #endif #endif /* arch_atomic64_dec_return_relaxed */ #ifndef arch_atomic64_fetch_dec_relaxed #ifdef arch_atomic64_fetch_dec #define arch_atomic64_fetch_dec_acquire arch_atomic64_fetch_dec #define arch_atomic64_fetch_dec_release arch_atomic64_fetch_dec #define arch_atomic64_fetch_dec_relaxed arch_atomic64_fetch_dec #endif /* arch_atomic64_fetch_dec */ #ifndef arch_atomic64_fetch_dec static __always_inline s64 arch_atomic64_fetch_dec(atomic64_t *v) { return arch_atomic64_fetch_sub(1, v); } #define arch_atomic64_fetch_dec arch_atomic64_fetch_dec #endif #ifndef arch_atomic64_fetch_dec_acquire static __always_inline s64 arch_atomic64_fetch_dec_acquire(atomic64_t *v) { return arch_atomic64_fetch_sub_acquire(1, v); } #define arch_atomic64_fetch_dec_acquire arch_atomic64_fetch_dec_acquire #endif #ifndef arch_atomic64_fetch_dec_release static __always_inline s64 arch_atomic64_fetch_dec_release(atomic64_t *v) { return arch_atomic64_fetch_sub_release(1, v); } #define arch_atomic64_fetch_dec_release arch_atomic64_fetch_dec_release #endif #ifndef arch_atomic64_fetch_dec_relaxed static __always_inline s64 arch_atomic64_fetch_dec_relaxed(atomic64_t *v) { return arch_atomic64_fetch_sub_relaxed(1, v); } #define arch_atomic64_fetch_dec_relaxed arch_atomic64_fetch_dec_relaxed #endif #else /* arch_atomic64_fetch_dec_relaxed */ #ifndef arch_atomic64_fetch_dec_acquire static __always_inline s64 arch_atomic64_fetch_dec_acquire(atomic64_t *v) { s64 ret = arch_atomic64_fetch_dec_relaxed(v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_fetch_dec_acquire arch_atomic64_fetch_dec_acquire #endif #ifndef arch_atomic64_fetch_dec_release static __always_inline s64 arch_atomic64_fetch_dec_release(atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_fetch_dec_relaxed(v); } #define arch_atomic64_fetch_dec_release arch_atomic64_fetch_dec_release #endif #ifndef arch_atomic64_fetch_dec static __always_inline s64 arch_atomic64_fetch_dec(atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_dec_relaxed(v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_fetch_dec arch_atomic64_fetch_dec #endif #endif /* arch_atomic64_fetch_dec_relaxed */ #ifndef arch_atomic64_fetch_and_relaxed #define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and #define arch_atomic64_fetch_and_release arch_atomic64_fetch_and #define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and #else /* arch_atomic64_fetch_and_relaxed */ #ifndef arch_atomic64_fetch_and_acquire static __always_inline s64 arch_atomic64_fetch_and_acquire(s64 i, atomic64_t *v) { s64 ret = arch_atomic64_fetch_and_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and_acquire #endif #ifndef arch_atomic64_fetch_and_release static __always_inline s64 arch_atomic64_fetch_and_release(s64 i, atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_fetch_and_relaxed(i, v); } #define arch_atomic64_fetch_and_release arch_atomic64_fetch_and_release #endif #ifndef arch_atomic64_fetch_and static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_and_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_fetch_and arch_atomic64_fetch_and #endif #endif /* arch_atomic64_fetch_and_relaxed */ #ifndef arch_atomic64_andnot static __always_inline void arch_atomic64_andnot(s64 i, atomic64_t *v) { arch_atomic64_and(~i, v); } #define arch_atomic64_andnot arch_atomic64_andnot #endif #ifndef arch_atomic64_fetch_andnot_relaxed #ifdef arch_atomic64_fetch_andnot #define arch_atomic64_fetch_andnot_acquire arch_atomic64_fetch_andnot #define arch_atomic64_fetch_andnot_release arch_atomic64_fetch_andnot #define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot #endif /* arch_atomic64_fetch_andnot */ #ifndef arch_atomic64_fetch_andnot static __always_inline s64 arch_atomic64_fetch_andnot(s64 i, atomic64_t *v) { return arch_atomic64_fetch_and(~i, v); } #define arch_atomic64_fetch_andnot arch_atomic64_fetch_andnot #endif #ifndef arch_atomic64_fetch_andnot_acquire static __always_inline s64 arch_atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v) { return arch_atomic64_fetch_and_acquire(~i, v); } #define arch_atomic64_fetch_andnot_acquire arch_atomic64_fetch_andnot_acquire #endif #ifndef arch_atomic64_fetch_andnot_release static __always_inline s64 arch_atomic64_fetch_andnot_release(s64 i, atomic64_t *v) { return arch_atomic64_fetch_and_release(~i, v); } #define arch_atomic64_fetch_andnot_release arch_atomic64_fetch_andnot_release #endif #ifndef arch_atomic64_fetch_andnot_relaxed static __always_inline s64 arch_atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v) { return arch_atomic64_fetch_and_relaxed(~i, v); } #define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot_relaxed #endif #else /* arch_atomic64_fetch_andnot_relaxed */ #ifndef arch_atomic64_fetch_andnot_acquire static __always_inline s64 arch_atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v) { s64 ret = arch_atomic64_fetch_andnot_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_fetch_andnot_acquire arch_atomic64_fetch_andnot_acquire #endif #ifndef arch_atomic64_fetch_andnot_release static __always_inline s64 arch_atomic64_fetch_andnot_release(s64 i, atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_fetch_andnot_relaxed(i, v); } #define arch_atomic64_fetch_andnot_release arch_atomic64_fetch_andnot_release #endif #ifndef arch_atomic64_fetch_andnot static __always_inline s64 arch_atomic64_fetch_andnot(s64 i, atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_andnot_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_fetch_andnot arch_atomic64_fetch_andnot #endif #endif /* arch_atomic64_fetch_andnot_relaxed */ #ifndef arch_atomic64_fetch_or_relaxed #define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or #define arch_atomic64_fetch_or_release arch_atomic64_fetch_or #define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or #else /* arch_atomic64_fetch_or_relaxed */ #ifndef arch_atomic64_fetch_or_acquire static __always_inline s64 arch_atomic64_fetch_or_acquire(s64 i, atomic64_t *v) { s64 ret = arch_atomic64_fetch_or_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or_acquire #endif #ifndef arch_atomic64_fetch_or_release static __always_inline s64 arch_atomic64_fetch_or_release(s64 i, atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_fetch_or_relaxed(i, v); } #define arch_atomic64_fetch_or_release arch_atomic64_fetch_or_release #endif #ifndef arch_atomic64_fetch_or static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_or_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_fetch_or arch_atomic64_fetch_or #endif #endif /* arch_atomic64_fetch_or_relaxed */ #ifndef arch_atomic64_fetch_xor_relaxed #define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor #define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor #define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor #else /* arch_atomic64_fetch_xor_relaxed */ #ifndef arch_atomic64_fetch_xor_acquire static __always_inline s64 arch_atomic64_fetch_xor_acquire(s64 i, atomic64_t *v) { s64 ret = arch_atomic64_fetch_xor_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor_acquire #endif #ifndef arch_atomic64_fetch_xor_release static __always_inline s64 arch_atomic64_fetch_xor_release(s64 i, atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_fetch_xor_relaxed(i, v); } #define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release #endif #ifndef arch_atomic64_fetch_xor static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_xor_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_fetch_xor arch_atomic64_fetch_xor #endif #endif /* arch_atomic64_fetch_xor_relaxed */ #ifndef arch_atomic64_xchg_relaxed #define arch_atomic64_xchg_acquire arch_atomic64_xchg #define arch_atomic64_xchg_release arch_atomic64_xchg #define arch_atomic64_xchg_relaxed arch_atomic64_xchg #else /* arch_atomic64_xchg_relaxed */ #ifndef arch_atomic64_xchg_acquire static __always_inline s64 arch_atomic64_xchg_acquire(atomic64_t *v, s64 i) { s64 ret = arch_atomic64_xchg_relaxed(v, i); __atomic_acquire_fence(); return ret; } #define arch_atomic64_xchg_acquire arch_atomic64_xchg_acquire #endif #ifndef arch_atomic64_xchg_release static __always_inline s64 arch_atomic64_xchg_release(atomic64_t *v, s64 i) { __atomic_release_fence(); return arch_atomic64_xchg_relaxed(v, i); } #define arch_atomic64_xchg_release arch_atomic64_xchg_release #endif #ifndef arch_atomic64_xchg static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 i) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_xchg_relaxed(v, i); __atomic_post_full_fence(); return ret; } #define arch_atomic64_xchg arch_atomic64_xchg #endif #endif /* arch_atomic64_xchg_relaxed */ #ifndef arch_atomic64_cmpxchg_relaxed #define arch_atomic64_cmpxchg_acquire arch_atomic64_cmpxchg #define arch_atomic64_cmpxchg_release arch_atomic64_cmpxchg #define arch_atomic64_cmpxchg_relaxed arch_atomic64_cmpxchg #else /* arch_atomic64_cmpxchg_relaxed */ #ifndef arch_atomic64_cmpxchg_acquire static __always_inline s64 arch_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) { s64 ret = arch_atomic64_cmpxchg_relaxed(v, old, new); __atomic_acquire_fence(); return ret; } #define arch_atomic64_cmpxchg_acquire arch_atomic64_cmpxchg_acquire #endif #ifndef arch_atomic64_cmpxchg_release static __always_inline s64 arch_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) { __atomic_release_fence(); return arch_atomic64_cmpxchg_relaxed(v, old, new); } #define arch_atomic64_cmpxchg_release arch_atomic64_cmpxchg_release #endif #ifndef arch_atomic64_cmpxchg static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_cmpxchg_relaxed(v, old, new); __atomic_post_full_fence(); return ret; } #define arch_atomic64_cmpxchg arch_atomic64_cmpxchg #endif #endif /* arch_atomic64_cmpxchg_relaxed */ #ifndef arch_atomic64_try_cmpxchg_relaxed #ifdef arch_atomic64_try_cmpxchg #define arch_atomic64_try_cmpxchg_acquire arch_atomic64_try_cmpxchg #define arch_atomic64_try_cmpxchg_release arch_atomic64_try_cmpxchg #define arch_atomic64_try_cmpxchg_relaxed arch_atomic64_try_cmpxchg #endif /* arch_atomic64_try_cmpxchg */ #ifndef arch_atomic64_try_cmpxchg static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { s64 r, o = *old; r = arch_atomic64_cmpxchg(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); } #define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg #endif #ifndef arch_atomic64_try_cmpxchg_acquire static __always_inline bool arch_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) { s64 r, o = *old; r = arch_atomic64_cmpxchg_acquire(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); } #define arch_atomic64_try_cmpxchg_acquire arch_atomic64_try_cmpxchg_acquire #endif #ifndef arch_atomic64_try_cmpxchg_release static __always_inline bool arch_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) { s64 r, o = *old; r = arch_atomic64_cmpxchg_release(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); } #define arch_atomic64_try_cmpxchg_release arch_atomic64_try_cmpxchg_release #endif #ifndef arch_atomic64_try_cmpxchg_relaxed static __always_inline bool arch_atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new) { s64 r, o = *old; r = arch_atomic64_cmpxchg_relaxed(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); } #define arch_atomic64_try_cmpxchg_relaxed arch_atomic64_try_cmpxchg_relaxed #endif #else /* arch_atomic64_try_cmpxchg_relaxed */ #ifndef arch_atomic64_try_cmpxchg_acquire static __always_inline bool arch_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) { bool ret = arch_atomic64_try_cmpxchg_relaxed(v, old, new); __atomic_acquire_fence(); return ret; } #define arch_atomic64_try_cmpxchg_acquire arch_atomic64_try_cmpxchg_acquire #endif #ifndef arch_atomic64_try_cmpxchg_release static __always_inline bool arch_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) { __atomic_release_fence(); return arch_atomic64_try_cmpxchg_relaxed(v, old, new); } #define arch_atomic64_try_cmpxchg_release arch_atomic64_try_cmpxchg_release #endif #ifndef arch_atomic64_try_cmpxchg static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { bool ret; __atomic_pre_full_fence(); ret = arch_atomic64_try_cmpxchg_relaxed(v, old, new); __atomic_post_full_fence(); return ret; } #define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg #endif #endif /* arch_atomic64_try_cmpxchg_relaxed */ #ifndef arch_atomic64_sub_and_test /** * arch_atomic64_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @v: pointer of type atomic64_t * * Atomically subtracts @i from @v and returns * true if the result is zero, or false for all * other cases. */ static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) { return arch_atomic64_sub_return(i, v) == 0; } #define arch_atomic64_sub_and_test arch_atomic64_sub_and_test #endif #ifndef arch_atomic64_dec_and_test /** * arch_atomic64_dec_and_test - decrement and test * @v: pointer of type atomic64_t * * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other * cases. */ static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v) { return arch_atomic64_dec_return(v) == 0; } #define arch_atomic64_dec_and_test arch_atomic64_dec_and_test #endif #ifndef arch_atomic64_inc_and_test /** * arch_atomic64_inc_and_test - increment and test * @v: pointer of type atomic64_t * * Atomically increments @v by 1 * and returns true if the result is zero, or false for all * other cases. */ static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v) { return arch_atomic64_inc_return(v) == 0; } #define arch_atomic64_inc_and_test arch_atomic64_inc_and_test #endif #ifndef arch_atomic64_add_negative /** * arch_atomic64_add_negative - add and test if negative * @i: integer value to add * @v: pointer of type atomic64_t * * Atomically adds @i to @v and returns true * if the result is negative, or false when * result is greater than or equal to zero. */ static __always_inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v) { return arch_atomic64_add_return(i, v) < 0; } #define arch_atomic64_add_negative arch_atomic64_add_negative #endif #ifndef arch_atomic64_fetch_add_unless /** * arch_atomic64_fetch_add_unless - add unless the number is already a given value * @v: pointer of type atomic64_t * @a: the amount to add to v... * @u: ...unless v is equal to u. * * Atomically adds @a to @v, so long as @v was not already @u. * Returns original value of @v */ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { s64 c = arch_atomic64_read(v); do { if (unlikely(c == u)) break; } while (!arch_atomic64_try_cmpxchg(v, &c, c + a)); return c; } #define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless #endif #ifndef arch_atomic64_add_unless /** * arch_atomic64_add_unless - add unless the number is already a given value * @v: pointer of type atomic64_t * @a: the amount to add to v... * @u: ...unless v is equal to u. * * Atomically adds @a to @v, if @v was not already @u. * Returns true if the addition was done. */ static __always_inline bool arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) { return arch_atomic64_fetch_add_unless(v, a, u) != u; } #define arch_atomic64_add_unless arch_atomic64_add_unless #endif #ifndef arch_atomic64_inc_not_zero /** * arch_atomic64_inc_not_zero - increment unless the number is zero * @v: pointer of type atomic64_t * * Atomically increments @v by 1, if @v is non-zero. * Returns true if the increment was done. */ static __always_inline bool arch_atomic64_inc_not_zero(atomic64_t *v) { return arch_atomic64_add_unless(v, 1, 0); } #define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero #endif #ifndef arch_atomic64_inc_unless_negative static __always_inline bool arch_atomic64_inc_unless_negative(atomic64_t *v) { s64 c = arch_atomic64_read(v); do { if (unlikely(c < 0)) return false; } while (!arch_atomic64_try_cmpxchg(v, &c, c + 1)); return true; } #define arch_atomic64_inc_unless_negative arch_atomic64_inc_unless_negative #endif #ifndef arch_atomic64_dec_unless_positive static __always_inline bool arch_atomic64_dec_unless_positive(atomic64_t *v) { s64 c = arch_atomic64_read(v); do { if (unlikely(c > 0)) return false; } while (!arch_atomic64_try_cmpxchg(v, &c, c - 1)); return true; } #define arch_atomic64_dec_unless_positive arch_atomic64_dec_unless_positive #endif #ifndef arch_atomic64_dec_if_positive static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) { s64 dec, c = arch_atomic64_read(v); do { dec = c - 1; if (unlikely(dec < 0)) break; } while (!arch_atomic64_try_cmpxchg(v, &c, dec)); return dec; } #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive #endif #endif /* _LINUX_ATOMIC_FALLBACK_H */ // 90cd26cfd69d2250303d654955a0cc12620fb91b
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 // SPDX-License-Identifier: GPL-2.0 /* File: fs/ext4/xattr.h On-disk format of extended attributes for the ext4 filesystem. (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> */ #include <linux/xattr.h> /* Magic value in attribute blocks */ #define EXT4_XATTR_MAGIC 0xEA020000 /* Maximum number of references to one attribute block */ #define EXT4_XATTR_REFCOUNT_MAX 1024 /* Name indexes */ #define EXT4_XATTR_INDEX_USER 1 #define EXT4_XATTR_INDEX_POSIX_ACL_ACCESS 2 #define EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT 3 #define EXT4_XATTR_INDEX_TRUSTED 4 #define EXT4_XATTR_INDEX_LUSTRE 5 #define EXT4_XATTR_INDEX_SECURITY 6 #define EXT4_XATTR_INDEX_SYSTEM 7 #define EXT4_XATTR_INDEX_RICHACL 8 #define EXT4_XATTR_INDEX_ENCRYPTION 9 #define EXT4_XATTR_INDEX_HURD 10 /* Reserved for Hurd */ struct ext4_xattr_header { __le32 h_magic; /* magic number for identification */ __le32 h_refcount; /* reference count */ __le32 h_blocks; /* number of disk blocks used */ __le32 h_hash; /* hash value of all attributes */ __le32 h_checksum; /* crc32c(uuid+id+xattrblock) */ /* id = inum if refcount=1, blknum otherwise */ __u32 h_reserved[3]; /* zero right now */ }; struct ext4_xattr_ibody_header { __le32 h_magic; /* magic number for identification */ }; struct ext4_xattr_entry { __u8 e_name_len; /* length of name */ __u8 e_name_index; /* attribute name index */ __le16 e_value_offs; /* offset in disk block of value */ __le32 e_value_inum; /* inode in which the value is stored */ __le32 e_value_size; /* size of attribute value */ __le32 e_hash; /* hash value of name and value */ char e_name[]; /* attribute name */ }; #define EXT4_XATTR_PAD_BITS 2 #define EXT4_XATTR_PAD (1<<EXT4_XATTR_PAD_BITS) #define EXT4_XATTR_ROUND (EXT4_XATTR_PAD-1) #define EXT4_XATTR_LEN(name_len) \ (((name_len) + EXT4_XATTR_ROUND + \ sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND) #define EXT4_XATTR_NEXT(entry) \ ((struct ext4_xattr_entry *)( \ (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len))) #define EXT4_XATTR_SIZE(size) \ (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) #define IHDR(inode, raw_inode) \ ((struct ext4_xattr_ibody_header *) \ ((void *)raw_inode + \ EXT4_GOOD_OLD_INODE_SIZE + \ EXT4_I(inode)->i_extra_isize)) #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) /* * XATTR_SIZE_MAX is currently 64k, but for the purposes of checking * for file system consistency errors, we use a somewhat bigger value. * This allows XATTR_SIZE_MAX to grow in the future, but by using this * instead of INT_MAX for certain consistency checks, we don't need to * worry about arithmetic overflows. (Actually XATTR_SIZE_MAX is * defined in include/uapi/linux/limits.h, so changing it is going * not going to be trivial....) */ #define EXT4_XATTR_SIZE_MAX (1 << 24) /* * The minimum size of EA value when you start storing it in an external inode * size of block - size of header - size of 1 entry - 4 null bytes */ #define EXT4_XATTR_MIN_LARGE_EA_SIZE(b) \ ((b) - EXT4_XATTR_LEN(3) - sizeof(struct ext4_xattr_header) - 4) #define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data)) #define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr)) #define BFIRST(bh) ENTRY(BHDR(bh)+1) #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) #define EXT4_ZERO_XATTR_VALUE ((void *)-1) struct ext4_xattr_info { const char *name; const void *value; size_t value_len; int name_index; int in_inode; }; struct ext4_xattr_search { struct ext4_xattr_entry *first; void *base; void *end; struct ext4_xattr_entry *here; int not_found; }; struct ext4_xattr_ibody_find { struct ext4_xattr_search s; struct ext4_iloc iloc; }; struct ext4_xattr_inode_array { unsigned int count; /* # of used items in the array */ struct inode *inodes[]; }; extern const struct xattr_handler ext4_xattr_user_handler; extern const struct xattr_handler ext4_xattr_trusted_handler; extern const struct xattr_handler ext4_xattr_security_handler; extern const struct xattr_handler ext4_xattr_hurd_handler; #define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c" /* * The EXT4_STATE_NO_EXPAND is overloaded and used for two purposes. * The first is to signal that there the inline xattrs and data are * taking up so much space that we might as well not keep trying to * expand it. The second is that xattr_sem is taken for writing, so * we shouldn't try to recurse into the inode expansion. For this * second case, we need to make sure that we take save and restore the * NO_EXPAND state flag appropriately. */ static inline void ext4_write_lock_xattr(struct inode *inode, int *save) { down_write(&EXT4_I(inode)->xattr_sem); *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); } static inline int ext4_write_trylock_xattr(struct inode *inode, int *save) { if (down_write_trylock(&EXT4_I(inode)->xattr_sem) == 0) return 0; *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); return 1; } static inline void ext4_write_unlock_xattr(struct inode *inode, int *save) { if (*save == 0) ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); up_write(&EXT4_I(inode)->xattr_sem); } extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int); extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); extern int ext4_xattr_set_credits(struct inode *inode, size_t value_len, bool is_create, int *credits); extern int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode, struct buffer_head *block_bh, size_t value_len, bool is_create); extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, struct ext4_xattr_inode_array **array, int extra_credits); extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array); extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, struct ext4_inode *raw_inode, handle_t *handle); extern const struct xattr_handler *ext4_xattr_handlers[]; extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, struct ext4_xattr_ibody_find *is); extern int ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, void *buffer, size_t buffer_size); extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, struct ext4_xattr_info *i, struct ext4_xattr_ibody_find *is); extern struct mb_cache *ext4_xattr_create_cache(void); extern void ext4_xattr_destroy_cache(struct mb_cache *); #ifdef CONFIG_EXT4_FS_SECURITY extern int ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir, const struct qstr *qstr); #else static inline int ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir, const struct qstr *qstr) { return 0; } #endif #ifdef CONFIG_LOCKDEP extern void ext4_xattr_inode_set_class(struct inode *ea_inode); #else static inline void ext4_xattr_inode_set_class(struct inode *ea_inode) { } #endif extern int ext4_get_inode_usage(struct inode *inode, qsize_t *usage);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TIME64_H #define _LINUX_TIME64_H #include <linux/math64.h> #include <vdso/time64.h> typedef __s64 time64_t; typedef __u64 timeu64_t; #include <uapi/linux/time.h> struct timespec64 { time64_t tv_sec; /* seconds */ long tv_nsec; /* nanoseconds */ }; struct itimerspec64 { struct timespec64 it_interval; struct timespec64 it_value; }; /* Located here for timespec[64]_valid_strict */ #define TIME64_MAX ((s64)~((u64)1 << 63)) #define TIME64_MIN (-TIME64_MAX - 1) #define KTIME_MAX ((s64)~((u64)1 << 63)) #define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) /* * Limits for settimeofday(): * * To prevent setting the time close to the wraparound point time setting * is limited so a reasonable uptime can be accomodated. Uptime of 30 years * should be really sufficient, which means the cutoff is 2232. At that * point the cutoff is just a small part of the larger problem. */ #define TIME_UPTIME_SEC_MAX (30LL * 365 * 24 *3600) #define TIME_SETTOD_SEC_MAX (KTIME_SEC_MAX - TIME_UPTIME_SEC_MAX) static inline int timespec64_equal(const struct timespec64 *a, const struct timespec64 *b) { return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); } /* * lhs < rhs: return <0 * lhs == rhs: return 0 * lhs > rhs: return >0 */ static inline int timespec64_compare(const struct timespec64 *lhs, const struct timespec64 *rhs) { if (lhs->tv_sec < rhs->tv_sec) return -1; if (lhs->tv_sec > rhs->tv_sec) return 1; return lhs->tv_nsec - rhs->tv_nsec; } extern void set_normalized_timespec64(struct timespec64 *ts, time64_t sec, s64 nsec); static inline struct timespec64 timespec64_add(struct timespec64 lhs, struct timespec64 rhs) { struct timespec64 ts_delta; set_normalized_timespec64(&ts_delta, lhs.tv_sec + rhs.tv_sec, lhs.tv_nsec + rhs.tv_nsec); return ts_delta; } /* * sub = lhs - rhs, in normalized form */ static inline struct timespec64 timespec64_sub(struct timespec64 lhs, struct timespec64 rhs) { struct timespec64 ts_delta; set_normalized_timespec64(&ts_delta, lhs.tv_sec - rhs.tv_sec, lhs.tv_nsec - rhs.tv_nsec); return ts_delta; } /* * Returns true if the timespec64 is norm, false if denorm: */ static inline bool timespec64_valid(const struct timespec64 *ts) { /* Dates before 1970 are bogus */ if (ts->tv_sec < 0) return false; /* Can't have more nanoseconds then a second */ if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) return false; return true; } static inline bool timespec64_valid_strict(const struct timespec64 *ts) { if (!timespec64_valid(ts)) return false; /* Disallow values that could overflow ktime_t */ if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX) return false; return true; } static inline bool timespec64_valid_settod(const struct timespec64 *ts) { if (!timespec64_valid(ts)) return false; /* Disallow values which cause overflow issues vs. CLOCK_REALTIME */ if ((unsigned long long)ts->tv_sec >= TIME_SETTOD_SEC_MAX) return false; return true; } /** * timespec64_to_ns - Convert timespec64 to nanoseconds * @ts: pointer to the timespec64 variable to be converted * * Returns the scalar nanosecond representation of the timespec64 * parameter. */ static inline s64 timespec64_to_ns(const struct timespec64 *ts) { /* Prevent multiplication overflow */ if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX) return KTIME_MAX; return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; } /** * ns_to_timespec64 - Convert nanoseconds to timespec64 * @nsec: the nanoseconds value to be converted * * Returns the timespec64 representation of the nsec parameter. */ extern struct timespec64 ns_to_timespec64(const s64 nsec); /** * timespec64_add_ns - Adds nanoseconds to a timespec64 * @a: pointer to timespec64 to be incremented * @ns: unsigned nanoseconds value to be added * * This must always be inlined because its used from the x86-64 vdso, * which cannot call other kernel functions. */ static __always_inline void timespec64_add_ns(struct timespec64 *a, u64 ns) { a->tv_sec += __iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns); a->tv_nsec = ns; } /* * timespec64_add_safe assumes both values are positive and checks for * overflow. It will return TIME64_MAX in case of overflow. */ extern struct timespec64 timespec64_add_safe(const struct timespec64 lhs, const struct timespec64 rhs); #endif /* _LINUX_TIME64_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 /* SPDX-License-Identifier: GPL-2.0 */ /* * include/linux/writeback.h */ #ifndef WRITEBACK_H #define WRITEBACK_H #include <linux/sched.h> #include <linux/workqueue.h> #include <linux/fs.h> #include <linux/flex_proportions.h> #include <linux/backing-dev-defs.h> #include <linux/blk_types.h> #include <linux/blk-cgroup.h> struct bio; DECLARE_PER_CPU(int, dirty_throttle_leaks); /* * The 1/4 region under the global dirty thresh is for smooth dirty throttling: * * (thresh - thresh/DIRTY_FULL_SCOPE, thresh) * * Further beyond, all dirtier tasks will enter a loop waiting (possibly long * time) for the dirty pages to drop, unless written enough pages. * * The global dirty threshold is normally equal to the global dirty limit, * except when the system suddenly allocates a lot of anonymous memory and * knocks down the global dirty threshold quickly, in which case the global * dirty limit will follow down slowly to prevent livelocking all dirtier tasks. */ #define DIRTY_SCOPE 8 #define DIRTY_FULL_SCOPE (DIRTY_SCOPE / 2) struct backing_dev_info; /* * fs/fs-writeback.c */ enum writeback_sync_modes { WB_SYNC_NONE, /* Don't wait on anything */ WB_SYNC_ALL, /* Wait on every mapping */ }; /* * A control structure which tells the writeback code what to do. These are * always on the stack, and hence need no locking. They are always initialised * in a manner such that unspecified fields are set to zero. */ struct writeback_control { long nr_to_write; /* Write this many pages, and decrement this for each page written */ long pages_skipped; /* Pages which were not written */ /* * For a_ops->writepages(): if start or end are non-zero then this is * a hint that the filesystem need only write out the pages inside that * byterange. The byte at `end' is included in the writeout request. */ loff_t range_start; loff_t range_end; enum writeback_sync_modes sync_mode; unsigned for_kupdate:1; /* A kupdate writeback */ unsigned for_background:1; /* A background writeback */ unsigned tagged_writepages:1; /* tag-and-write to avoid livelock */ unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ /* * When writeback IOs are bounced through async layers, only the * initial synchronous phase should be accounted towards inode * cgroup ownership arbitration to avoid confusion. Later stages * can set the following flag to disable the accounting. */ unsigned no_cgroup_owner:1; unsigned punt_to_cgroup:1; /* cgrp punting, see __REQ_CGROUP_PUNT */ #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *wb; /* wb this writeback is issued under */ struct inode *inode; /* inode being written out */ /* foreign inode detection, see wbc_detach_inode() */ int wb_id; /* current wb id */ int wb_lcand_id; /* last foreign candidate wb id */ int wb_tcand_id; /* this foreign candidate wb id */ size_t wb_bytes; /* bytes written by current wb */ size_t wb_lcand_bytes; /* bytes written by last candidate */ size_t wb_tcand_bytes; /* bytes written by this candidate */ #endif }; static inline int wbc_to_write_flags(struct writeback_control *wbc) { int flags = 0; if (wbc->punt_to_cgroup) flags = REQ_CGROUP_PUNT; if (wbc->sync_mode == WB_SYNC_ALL) flags |= REQ_SYNC; else if (wbc->for_kupdate || wbc->for_background) flags |= REQ_BACKGROUND; return flags; } static inline struct cgroup_subsys_state * wbc_blkcg_css(struct writeback_control *wbc) { #ifdef CONFIG_CGROUP_WRITEBACK if (wbc->wb) return wbc->wb->blkcg_css; #endif return blkcg_root_css; } /* * A wb_domain represents a domain that wb's (bdi_writeback's) belong to * and are measured against each other in. There always is one global * domain, global_wb_domain, that every wb in the system is a member of. * This allows measuring the relative bandwidth of each wb to distribute * dirtyable memory accordingly. */ struct wb_domain { spinlock_t lock; /* * Scale the writeback cache size proportional to the relative * writeout speed. * * We do this by keeping a floating proportion between BDIs, based * on page writeback completions [end_page_writeback()]. Those * devices that write out pages fastest will get the larger share, * while the slower will get a smaller share. * * We use page writeout completions because we are interested in * getting rid of dirty pages. Having them written out is the * primary goal. * * We introduce a concept of time, a period over which we measure * these events, because demand can/will vary over time. The length * of this period itself is measured in page writeback completions. */ struct fprop_global completions; struct timer_list period_timer; /* timer for aging of completions */ unsigned long period_time; /* * The dirtyable memory and dirty threshold could be suddenly * knocked down by a large amount (eg. on the startup of KVM in a * swapless system). This may throw the system into deep dirty * exceeded state and throttle heavy/light dirtiers alike. To * retain good responsiveness, maintain global_dirty_limit for * tracking slowly down to the knocked down dirty threshold. * * Both fields are protected by ->lock. */ unsigned long dirty_limit_tstamp; unsigned long dirty_limit; }; /** * wb_domain_size_changed - memory available to a wb_domain has changed * @dom: wb_domain of interest * * This function should be called when the amount of memory available to * @dom has changed. It resets @dom's dirty limit parameters to prevent * the past values which don't match the current configuration from skewing * dirty throttling. Without this, when memory size of a wb_domain is * greatly reduced, the dirty throttling logic may allow too many pages to * be dirtied leading to consecutive unnecessary OOMs and may get stuck in * that situation. */ static inline void wb_domain_size_changed(struct wb_domain *dom) { spin_lock(&dom->lock); dom->dirty_limit_tstamp = jiffies; dom->dirty_limit = 0; spin_unlock(&dom->lock); } /* * fs/fs-writeback.c */ struct bdi_writeback; void writeback_inodes_sb(struct super_block *, enum wb_reason reason); void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, enum wb_reason reason); void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason); void sync_inodes_sb(struct super_block *); void wakeup_flusher_threads(enum wb_reason reason); void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi, enum wb_reason reason); void inode_wait_for_writeback(struct inode *inode); void inode_io_list_del(struct inode *inode); /* writeback.h requires fs.h; it, too, is not included from here. */ static inline void wait_on_inode(struct inode *inode) { might_sleep(); wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE); } #ifdef CONFIG_CGROUP_WRITEBACK #include <linux/cgroup.h> #include <linux/bio.h> void __inode_attach_wb(struct inode *inode, struct page *page); void wbc_attach_and_unlock_inode(struct writeback_control *wbc, struct inode *inode) __releases(&inode->i_lock); void wbc_detach_inode(struct writeback_control *wbc); void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, size_t bytes); int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages, enum wb_reason reason, struct wb_completion *done); void cgroup_writeback_umount(void); /** * inode_attach_wb - associate an inode with its wb * @inode: inode of interest * @page: page being dirtied (may be NULL) * * If @inode doesn't have its wb, associate it with the wb matching the * memcg of @page or, if @page is NULL, %current. May be called w/ or w/o * @inode->i_lock. */ static inline void inode_attach_wb(struct inode *inode, struct page *page) { if (!inode->i_wb) __inode_attach_wb(inode, page); } /** * inode_detach_wb - disassociate an inode from its wb * @inode: inode of interest * * @inode is being freed. Detach from its wb. */ static inline void inode_detach_wb(struct inode *inode) { if (inode->i_wb) { WARN_ON_ONCE(!(inode->i_state & I_CLEAR)); wb_put(inode->i_wb); inode->i_wb = NULL; } } /** * wbc_attach_fdatawrite_inode - associate wbc and inode for fdatawrite * @wbc: writeback_control of interest * @inode: target inode * * This function is to be used by __filemap_fdatawrite_range(), which is an * alternative entry point into writeback code, and first ensures @inode is * associated with a bdi_writeback and attaches it to @wbc. */ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, struct inode *inode) { spin_lock(&inode->i_lock); inode_attach_wb(inode, NULL); wbc_attach_and_unlock_inode(wbc, inode); } /** * wbc_init_bio - writeback specific initializtion of bio * @wbc: writeback_control for the writeback in progress * @bio: bio to be initialized * * @bio is a part of the writeback in progress controlled by @wbc. Perform * writeback specific initialization. This is used to apply the cgroup * writeback context. Must be called after the bio has been associated with * a device. */ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) { /* * pageout() path doesn't attach @wbc to the inode being written * out. This is intentional as we don't want the function to block * behind a slow cgroup. Ultimately, we want pageout() to kick off * regular writeback instead of writing things out itself. */ if (wbc->wb) bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css); } #else /* CONFIG_CGROUP_WRITEBACK */ static inline void inode_attach_wb(struct inode *inode, struct page *page) { } static inline void inode_detach_wb(struct inode *inode) { } static inline void wbc_attach_and_unlock_inode(struct writeback_control *wbc, struct inode *inode) __releases(&inode->i_lock) { spin_unlock(&inode->i_lock); } static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, struct inode *inode) { } static inline void wbc_detach_inode(struct writeback_control *wbc) { } static inline