1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 /* SPDX-License-Identifier: GPL-2.0 */ /* linux/include/linux/clockchips.h * * This file contains the structure definitions for clockchips. * * If you are not a clockchip, or the time of day code, you should * not be including this file! */ #ifndef _LINUX_CLOCKCHIPS_H #define _LINUX_CLOCKCHIPS_H #ifdef CONFIG_GENERIC_CLOCKEVENTS # include <linux/clocksource.h> # include <linux/cpumask.h> # include <linux/ktime.h> # include <linux/notifier.h> struct clock_event_device; struct module; /* * Possible states of a clock event device. * * DETACHED: Device is not used by clockevents core. Initial state or can be * reached from SHUTDOWN. * SHUTDOWN: Device is powered-off. Can be reached from PERIODIC or ONESHOT. * PERIODIC: Device is programmed to generate events periodically. Can be * reached from DETACHED or SHUTDOWN. * ONESHOT: Device is programmed to generate event only once. Can be reached * from DETACHED or SHUTDOWN. * ONESHOT_STOPPED: Device was programmed in ONESHOT mode and is temporarily * stopped. */ enum clock_event_state { CLOCK_EVT_STATE_DETACHED, CLOCK_EVT_STATE_SHUTDOWN, CLOCK_EVT_STATE_PERIODIC, CLOCK_EVT_STATE_ONESHOT, CLOCK_EVT_STATE_ONESHOT_STOPPED, }; /* * Clock event features */ # define CLOCK_EVT_FEAT_PERIODIC 0x000001 # define CLOCK_EVT_FEAT_ONESHOT 0x000002 # define CLOCK_EVT_FEAT_KTIME 0x000004 /* * x86(64) specific (mis)features: * * - Clockevent source stops in C3 State and needs broadcast support. * - Local APIC timer is used as a dummy device. */ # define CLOCK_EVT_FEAT_C3STOP 0x000008 # define CLOCK_EVT_FEAT_DUMMY 0x000010 /* * Core shall set the interrupt affinity dynamically in broadcast mode */ # define CLOCK_EVT_FEAT_DYNIRQ 0x000020 # define CLOCK_EVT_FEAT_PERCPU 0x000040 /* * Clockevent device is based on a hrtimer for broadcast */ # define CLOCK_EVT_FEAT_HRTIMER 0x000080 /** * struct clock_event_device - clock event device descriptor * @event_handler: Assigned by the framework to be called by the low * level handler of the event source * @set_next_event: set next event function using a clocksource delta * @set_next_ktime: set next event function using a direct ktime value * @next_event: local storage for the next event in oneshot mode * @max_delta_ns: maximum delta value in ns * @min_delta_ns: minimum delta value in ns * @mult: nanosecond to cycles multiplier * @shift: nanoseconds to cycles divisor (power of two) * @state_use_accessors:current state of the device, assigned by the core code * @features: features * @retries: number of forced programming retries * @set_state_periodic: switch state to periodic * @set_state_oneshot: switch state to oneshot * @set_state_oneshot_stopped: switch state to oneshot_stopped * @set_state_shutdown: switch state to shutdown * @tick_resume: resume clkevt device * @broadcast: function to broadcast events * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration * @name: ptr to clock event name * @rating: variable to rate clock event devices * @irq: IRQ number (only for non CPU local devices) * @bound_on: Bound on CPU * @cpumask: cpumask to indicate for which CPUs this device works * @list: list head for the management code * @owner: module reference */ struct clock_event_device { void (*event_handler)(struct clock_event_device *); int (*set_next_event)(unsigned long evt, struct clock_event_device *); int (*set_next_ktime)(ktime_t expires, struct clock_event_device *); ktime_t next_event; u64 max_delta_ns; u64 min_delta_ns; u32 mult; u32 shift; enum clock_event_state state_use_accessors; unsigned int features; unsigned long retries; int (*set_state_periodic)(struct clock_event_device *); int (*set_state_oneshot)(struct clock_event_device *); int (*set_state_oneshot_stopped)(struct clock_event_device *); int (*set_state_shutdown)(struct clock_event_device *); int (*tick_resume)(struct clock_event_device *); void (*broadcast)(const struct cpumask *mask); void (*suspend)(struct clock_event_device *); void (*resume)(struct clock_event_device *); unsigned long min_delta_ticks; unsigned long max_delta_ticks; const char *name; int rating; int irq; int bound_on; const struct cpumask *cpumask; struct list_head list; struct module *owner; } ____cacheline_aligned; /* Helpers to verify state of a clockevent device */ static inline bool clockevent_state_detached(struct clock_event_device *dev) { return dev->state_use_accessors == CLOCK_EVT_STATE_DETACHED; } static inline bool clockevent_state_shutdown(struct clock_event_device *dev) { return dev->state_use_accessors == CLOCK_EVT_STATE_SHUTDOWN; } static inline bool clockevent_state_periodic(struct clock_event_device *dev) { return dev->state_use_accessors == CLOCK_EVT_STATE_PERIODIC; } static inline bool clockevent_state_oneshot(struct clock_event_device *dev) { return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT; } static inline bool clockevent_state_oneshot_stopped(struct clock_event_device *dev) { return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT_STOPPED; } /* * Calculate a multiplication factor for scaled math, which is used to convert * nanoseconds based values to clock ticks: * * clock_ticks = (nanoseconds * factor) >> shift. * * div_sc is the rearranged equation to calculate a factor from a given clock * ticks / nanoseconds ratio: * * factor = (clock_ticks << shift) / nanoseconds */ static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec, int shift) { u64 tmp = ((u64)ticks) << shift; do_div(tmp, nsec); return (unsigned long) tmp; } /* Clock event layer functions */ extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt); extern void clockevents_register_device(struct clock_event_device *dev); extern int clockevents_unbind_device(struct clock_event_device *ced, int cpu); extern void clockevents_config_and_register(struct clock_event_device *dev, u32 freq, unsigned long min_delta, unsigned long max_delta); extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq); static inline void clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 maxsec) { return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, maxsec); } extern void clockevents_suspend(void); extern void clockevents_resume(void); # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST # ifdef CONFIG_ARCH_HAS_TICK_BROADCAST extern void tick_broadcast(const struct cpumask *mask); # else # define tick_broadcast NULL # endif extern int tick_receive_broadcast(void); # endif # if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) extern void tick_setup_hrtimer_broadcast(void); extern int tick_check_broadcast_expired(void); # else static inline int tick_check_broadcast_expired(void) { return 0; } static inline void tick_setup_hrtimer_broadcast(void) { } # endif #else /* !CONFIG_GENERIC_CLOCKEVENTS: */ static inline void clockevents_suspend(void) { } static inline void clockevents_resume(void) { } static inline int tick_check_broadcast_expired(void) { return 0; } static inline void tick_setup_hrtimer_broadcast(void) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ #endif /* _LINUX_CLOCKCHIPS_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SEQ_FILE_H #define _LINUX_SEQ_FILE_H #include <linux/types.h> #include <linux/string.h> #include <linux/bug.h> #include <linux/mutex.h> #include <linux/cpumask.h> #include <linux/nodemask.h> #include <linux/fs.h> #include <linux/cred.h> struct seq_operations; struct seq_file { char *buf; size_t size; size_t from; size_t count; size_t pad_until; loff_t index; loff_t read_pos; struct mutex lock; const struct seq_operations *op; int poll_event; const struct file *file; void *private; }; struct seq_operations { void * (*start) (struct seq_file *m, loff_t *pos); void (*stop) (struct seq_file *m, void *v); void * (*next) (struct seq_file *m, void *v, loff_t *pos); int (*show) (struct seq_file *m, void *v); }; #define SEQ_SKIP 1 /** * seq_has_overflowed - check if the buffer has overflowed * @m: the seq_file handle * * seq_files have a buffer which may overflow. When this happens a larger * buffer is reallocated and all the data will be printed again. * The overflow state is true when m->count == m->size. * * Returns true if the buffer received more than it can hold. */ static inline bool seq_has_overflowed(struct seq_file *m) { return m->count == m->size; } /** * seq_get_buf - get buffer to write arbitrary data to * @m: the seq_file handle * @bufp: the beginning of the buffer is stored here * * Return the number of bytes available in the buffer, or zero if * there's no space. */ static inline size_t seq_get_buf(struct seq_file *m, char **bufp) { BUG_ON(m->count > m->size); if (m->count < m->size) *bufp = m->buf + m->count; else *bufp = NULL; return m->size - m->count; } /** * seq_commit - commit data to the buffer * @m: the seq_file handle * @num: the number of bytes to commit * * Commit @num bytes of data written to a buffer previously acquired * by seq_buf_get. To signal an error condition, or that the data * didn't fit in the available space, pass a negative @num value. */ static inline void seq_commit(struct seq_file *m, int num) { if (num < 0) { m->count = m->size; } else { BUG_ON(m->count + num > m->size); m->count += num; } } /** * seq_setwidth - set padding width * @m: the seq_file handle * @size: the max number of bytes to pad. * * Call seq_setwidth() for setting max width, then call seq_printf() etc. and * finally call seq_pad() to pad the remaining bytes. */ static inline void seq_setwidth(struct seq_file *m, size_t size) { m->pad_until = m->count + size; } void seq_pad(struct seq_file *m, char c); char *mangle_path(char *s, const char *p, const char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter); loff_t seq_lseek(struct file *, loff_t, int); int seq_release(struct inode *, struct file *); int seq_write(struct seq_file *seq, const void *data, size_t len); __printf(2, 0) void seq_vprintf(struct seq_file *m, const char *fmt, va_list args); __printf(2, 3) void seq_printf(struct seq_file *m, const char *fmt, ...); void seq_putc(struct seq_file *m, char c); void seq_puts(struct seq_file *m, const char *s); void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter, unsigned long long num, unsigned int width); void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, unsigned long long num); void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num); void seq_put_hex_ll(struct seq_file *m, const char *delimiter, unsigned long long v, unsigned int width); void seq_escape(struct seq_file *m, const char *s, const char *esc); void seq_escape_mem_ascii(struct seq_file *m, const char *src, size_t isz); void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii); int seq_path(struct seq_file *, const struct path *, const char *); int seq_file_path(struct seq_file *, struct file *, const char *); int seq_dentry(struct seq_file *, struct dentry *, const char *); int seq_path_root(struct seq_file *m, const struct path *path, const struct path *root, const char *esc); int single_open(struct file *, int (*)(struct seq_file *, void *), void *); int single_open_size(struct file *, int (*)(struct seq_file *, void *), void *, size_t); int single_release(struct inode *, struct file *); void *__seq_open_private(struct file *, const struct seq_operations *, int); int seq_open_private(struct file *, const struct seq_operations *, int); int seq_release_private(struct inode *, struct file *); #define DEFINE_SEQ_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ int ret = seq_open(file, &__name ## _sops); \ if (!ret && inode->i_private) { \ struct seq_file *seq_f = file->private_data; \ seq_f->private = inode->i_private; \ } \ return ret; \ } \ \ static const struct file_operations __name ## _fops = { \ .owner = THIS_MODULE, \ .open = __name ## _open, \ .read = seq_read, \ .llseek = seq_lseek, \ .release = seq_release, \ } #define DEFINE_SHOW_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ return single_open(file, __name ## _show, inode->i_private); \ } \ \ static const struct file_operations __name ## _fops = { \ .owner = THIS_MODULE, \ .open = __name ## _open, \ .read = seq_read, \ .llseek = seq_lseek, \ .release = single_release, \ } #define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ return single_open(file, __name ## _show, PDE_DATA(inode)); \ } \ \ static const struct proc_ops __name ## _proc_ops = { \ .proc_open = __name ## _open, \ .proc_read = seq_read, \ .proc_lseek = seq_lseek, \ .proc_release = single_release, \ } static inline struct user_namespace *seq_user_ns(struct seq_file *seq) { #ifdef CONFIG_USER_NS return seq->file->f_cred->user_ns; #else extern struct user_namespace init_user_ns; return &init_user_ns; #endif } /** * seq_show_options - display mount options with appropriate escapes. * @m: the seq_file handle * @name: the mount option name * @value: the mount option name's value, can be NULL */ static inline void seq_show_option(struct seq_file *m, const char *name, const char *value) { seq_putc(m, ','); seq_escape(m, name, ",= \t\n\\"); if (value) { seq_putc(m, '='); seq_escape(m, value, ", \t\n\\"); } } /** * seq_show_option_n - display mount options with appropriate escapes * where @value must be a specific length. * @m: the seq_file handle * @name: the mount option name * @value: the mount option name's value, cannot be NULL * @length: the length of @value to display * * This is a macro since this uses "length" to define the size of the * stack buffer. */ #define seq_show_option_n(m, name, value, length) { \ char val_buf[length + 1]; \ strncpy(val_buf, value, length); \ val_buf[length] = '\0'; \ seq_show_option(m, name, val_buf); \ } #define SEQ_START_TOKEN ((void *)1) /* * Helpers for iteration over list_head-s in seq_files */ extern struct list_head *seq_list_start(struct list_head *head, loff_t pos); extern struct list_head *seq_list_start_head(struct list_head *head, loff_t pos); extern struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos); /* * Helpers for iteration over hlist_head-s in seq_files */ extern struct hlist_node *seq_hlist_start(struct hlist_head *head, loff_t pos); extern struct hlist_node *seq_hlist_start_head(struct hlist_head *head, loff_t pos); extern struct hlist_node *seq_hlist_next(void *v, struct hlist_head *head, loff_t *ppos); extern struct hlist_node *seq_hlist_start_rcu(struct hlist_head *head, loff_t pos); extern struct hlist_node *seq_hlist_start_head_rcu(struct hlist_head *head, loff_t pos); extern struct hlist_node *seq_hlist_next_rcu(void *v, struct hlist_head *head, loff_t *ppos); /* Helpers for iterating over per-cpu hlist_head-s in seq_files */ extern struct hlist_node *seq_hlist_start_percpu(struct hlist_head __percpu *head, int *cpu, loff_t pos); extern struct hlist_node *seq_hlist_next_percpu(void *v, struct hlist_head __percpu *head, int *cpu, loff_t *pos); void seq_file_init(void); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_DST_METADATA_H #define __NET_DST_METADATA_H 1 #include <linux/skbuff.h> #include <net/ip_tunnels.h> #include <net/dst.h> enum metadata_type { METADATA_IP_TUNNEL, METADATA_HW_PORT_MUX, }; struct hw_port_info { struct net_device *lower_dev; u32 port_id; }; struct metadata_dst { struct dst_entry dst; enum metadata_type type; union { struct ip_tunnel_info tun_info; struct hw_port_info port_info; } u; }; static inline struct metadata_dst *skb_metadata_dst(const struct sk_buff *skb) { struct metadata_dst *md_dst = (struct metadata_dst *) skb_dst(skb); if (md_dst && md_dst->dst.flags & DST_METADATA) return md_dst; return NULL; } static inline struct ip_tunnel_info * skb_tunnel_info(const struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; if (md_dst && md_dst->type == METADATA_IP_TUNNEL) return &md_dst->u.tun_info; dst = skb_dst(skb); if (dst && dst->lwtstate && (dst->lwtstate->type == LWTUNNEL_ENCAP_IP || dst->lwtstate->type == LWTUNNEL_ENCAP_IP6)) return lwt_tun_info(dst->lwtstate); return NULL; } static inline bool skb_valid_dst(const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); return dst && !(dst->flags & DST_METADATA); } static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, const struct sk_buff *skb_b) { const struct metadata_dst *a, *b; if (!(skb_a->_skb_refdst | skb_b->_skb_refdst)) return 0; a = (const struct metadata_dst *) skb_dst(skb_a); b = (const struct metadata_dst *) skb_dst(skb_b); if (!a != !b || a->type != b->type) return 1; switch (a->type) { case METADATA_HW_PORT_MUX: return memcmp(&a->u.port_info, &b->u.port_info, sizeof(a->u.port_info)); case METADATA_IP_TUNNEL: return memcmp(&a->u.tun_info, &b->u.tun_info, sizeof(a->u.tun_info) + a->u.tun_info.options_len); default: return 1; } } void metadata_dst_free(struct metadata_dst *); struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, gfp_t flags); void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst); struct metadata_dst __percpu * metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags); static inline struct metadata_dst *tun_rx_dst(int md_size) { struct metadata_dst *tun_dst; tun_dst = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!tun_dst) return NULL; tun_dst->u.tun_info.options_len = 0; tun_dst->u.tun_info.mode = 0; return tun_dst; } static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); int md_size; struct metadata_dst *new_md; if (!md_dst || md_dst->type != METADATA_IP_TUNNEL) return ERR_PTR(-EINVAL); md_size = md_dst->u.tun_info.options_len; new_md = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!new_md) return ERR_PTR(-ENOMEM); memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, sizeof(struct ip_tunnel_info) + md_size); skb_dst_drop(skb); dst_hold(&new_md->dst); skb_dst_set(skb, &new_md->dst); return new_md; } static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb) { struct metadata_dst *dst; dst = tun_dst_unclone(skb); if (IS_ERR(dst)) return NULL; return &dst->u.tun_info; } static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, __be32 daddr, __u8 tos, __u8 ttl, __be16 tp_dst, __be16 flags, __be64 tunnel_id, int md_size) { struct metadata_dst *tun_dst; tun_dst = tun_rx_dst(md_size); if (!tun_dst) return NULL; ip_tunnel_key_init(&tun_dst->u.tun_info.key, saddr, daddr, tos, ttl, 0, 0, tp_dst, tunnel_id, flags); return tun_dst; } static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, __be16 flags, __be64 tunnel_id, int md_size) { const struct iphdr *iph = ip_hdr(skb); return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl, 0, flags, tunnel_id, md_size); } static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr, const struct in6_addr *daddr, __u8 tos, __u8 ttl, __be16 tp_dst, __be32 label, __be16 flags, __be64 tunnel_id, int md_size) { struct metadata_dst *tun_dst; struct ip_tunnel_info *info; tun_dst = tun_rx_dst(md_size); if (!tun_dst) return NULL; info = &tun_dst->u.tun_info; info->mode = IP_TUNNEL_INFO_IPV6; info->key.tun_flags = flags; info->key.tun_id = tunnel_id; info->key.tp_src = 0; info->key.tp_dst = tp_dst; info->key.u.ipv6.src = *saddr; info->key.u.ipv6.dst = *daddr; info->key.tos = tos; info->key.ttl = ttl; info->key.label = label; return tun_dst; } static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, __be16 flags, __be64 tunnel_id, int md_size) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr, ipv6_get_dsfield(ip6h), ip6h->hop_limit, 0, ip6_flowlabel(ip6h), flags, tunnel_id, md_size); } #endif /* __NET_DST_METADATA_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * 25-Jul-1998 Major changes to allow for ip chain table * * 3-Jan-2000 Named tables to allow packet selection for different uses. */ /* * Format of an IP firewall descriptor * * src, dst, src_mask, dst_mask are always stored in network byte order. * flags are stored in host byte order (of course). * Port numbers are stored in HOST byte order. */ #ifndef _UAPI_IPTABLES_H #define _UAPI_IPTABLES_H #include <linux/types.h> #include <linux/compiler.h> #include <linux/if.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter/x_tables.h> #ifndef __KERNEL__ #define IPT_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN #define IPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN #define ipt_match xt_match #define ipt_target xt_target #define ipt_table xt_table #define ipt_get_revision xt_get_revision #define ipt_entry_match xt_entry_match #define ipt_entry_target xt_entry_target #define ipt_standard_target xt_standard_target #define ipt_error_target xt_error_target #define ipt_counters xt_counters #define IPT_CONTINUE XT_CONTINUE #define IPT_RETURN XT_RETURN /* This group is older than old (iptables < v1.4.0-rc1~89) */ #include <linux/netfilter/xt_tcpudp.h> #define ipt_udp xt_udp #define ipt_tcp xt_tcp #define IPT_TCP_INV_SRCPT XT_TCP_INV_SRCPT #define IPT_TCP_INV_DSTPT XT_TCP_INV_DSTPT #define IPT_TCP_INV_FLAGS XT_TCP_INV_FLAGS #define IPT_TCP_INV_OPTION XT_TCP_INV_OPTION #define IPT_TCP_INV_MASK XT_TCP_INV_MASK #define IPT_UDP_INV_SRCPT XT_UDP_INV_SRCPT #define IPT_UDP_INV_DSTPT XT_UDP_INV_DSTPT #define IPT_UDP_INV_MASK XT_UDP_INV_MASK /* The argument to IPT_SO_ADD_COUNTERS. */ #define ipt_counters_info xt_counters_info /* Standard return verdict, or do jump. */ #define IPT_STANDARD_TARGET XT_STANDARD_TARGET /* Error verdict. */ #define IPT_ERROR_TARGET XT_ERROR_TARGET /* fn returns 0 to continue iteration */ #define IPT_MATCH_ITERATE(e, fn, args...) \ XT_MATCH_ITERATE(struct ipt_entry, e, fn, ## args) /* fn returns 0 to continue iteration */ #define IPT_ENTRY_ITERATE(entries, size, fn, args...) \ XT_ENTRY_ITERATE(struct ipt_entry, entries, size, fn, ## args) #endif /* Yes, Virginia, you have to zero the padding. */ struct ipt_ip { /* Source and destination IP addr */ struct in_addr src, dst; /* Mask for src and dest IP addr */ struct in_addr smsk, dmsk; char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; /* Protocol, 0 = ANY */ __u16 proto; /* Flags word */ __u8 flags; /* Inverse flags */ __u8 invflags; }; /* Values for "flag" field in struct ipt_ip (general ip structure). */ #define IPT_F_FRAG 0x01 /* Set if rule is a fragment rule */ #define IPT_F_GOTO 0x02 /* Set if jump is a goto */ #define IPT_F_MASK 0x03 /* All possible flag bits mask. */ /* Values for "inv" field in struct ipt_ip. */ #define IPT_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ #define IPT_INV_VIA_OUT 0x02 /* Invert the sense of OUT IFACE */ #define IPT_INV_TOS 0x04 /* Invert the sense of TOS. */ #define IPT_INV_SRCIP 0x08 /* Invert the sense of SRC IP. */ #define IPT_INV_DSTIP 0x10 /* Invert the sense of DST OP. */ #define IPT_INV_FRAG 0x20 /* Invert the sense of FRAG. */ #define IPT_INV_PROTO XT_INV_PROTO #define IPT_INV_MASK 0x7F /* All possible flag bits mask. */ /* This structure defines each of the firewall rules. Consists of 3 parts which are 1) general IP header stuff 2) match specific stuff 3) the target to perform if the rule matches */ struct ipt_entry { struct ipt_ip ip; /* Mark with fields that we care about. */ unsigned int nfcache; /* Size of ipt_entry + matches */ __u16 target_offset; /* Size of ipt_entry + matches + target */ __u16 next_offset; /* Back pointer */ unsigned int comefrom; /* Packet and byte counters. */ struct xt_counters counters; /* The matches (if any), then the target. */ unsigned char elems[0]; }; /* * New IP firewall options for [gs]etsockopt at the RAW IP level. * Unlike BSD Linux inherits IP options so you don't have to use a raw * socket for this. Instead we check rights in the calls. * * ATTENTION: check linux/in.h before adding new number here. */ #define IPT_BASE_CTL 64 #define IPT_SO_SET_REPLACE (IPT_BASE_CTL) #define IPT_SO_SET_ADD_COUNTERS (IPT_BASE_CTL + 1) #define IPT_SO_SET_MAX IPT_SO_SET_ADD_COUNTERS #define IPT_SO_GET_INFO (IPT_BASE_CTL) #define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1) #define IPT_SO_GET_REVISION_MATCH (IPT_BASE_CTL + 2) #define IPT_SO_GET_REVISION_TARGET (IPT_BASE_CTL + 3) #define IPT_SO_GET_MAX IPT_SO_GET_REVISION_TARGET /* ICMP matching stuff */ struct ipt_icmp { __u8 type; /* type to match */ __u8 code[2]; /* range of code */ __u8 invflags; /* Inverse flags */ }; /* Values for "inv" field for struct ipt_icmp. */ #define IPT_ICMP_INV 0x01 /* Invert the sense of type/code test */ /* The argument to IPT_SO_GET_INFO */ struct ipt_getinfo { /* Which table: caller fills this in. */ char name[XT_TABLE_MAXNAMELEN]; /* Kernel fills these in. */ /* Which hook entry points are valid: bitmask */ unsigned int valid_hooks; /* Hook entry points: one per netfilter hook. */ unsigned int hook_entry[NF_INET_NUMHOOKS]; /* Underflow points. */ unsigned int underflow[NF_INET_NUMHOOKS]; /* Number of entries */ unsigned int num_entries; /* Size of entries. */ unsigned int size; }; /* The argument to IPT_SO_SET_REPLACE. */ struct ipt_replace { /* Which table. */ char name[XT_TABLE_MAXNAMELEN]; /* Which hook entry points are valid: bitmask. You can't change this. */ unsigned int valid_hooks; /* Number of entries */ unsigned int num_entries; /* Total size of new entries */ unsigned int size; /* Hook entry points. */ unsigned int hook_entry[NF_INET_NUMHOOKS]; /* Underflow points. */ unsigned int underflow[NF_INET_NUMHOOKS]; /* Information about old entries: */ /* Number of counters (must be equal to current number of entries). */ unsigned int num_counters; /* The old entries' counters. */ struct xt_counters __user *counters; /* The entries (hang off end: not really an array). */ struct ipt_entry entries[0]; }; /* The argument to IPT_SO_GET_ENTRIES. */ struct ipt_get_entries { /* Which table: user fills this in. */ char name[XT_TABLE_MAXNAMELEN]; /* User fills this in: total entry size. */ unsigned int size; /* The entries. */ struct ipt_entry entrytable[0]; }; /* Helper functions */ static __inline__ struct xt_entry_target * ipt_get_target(struct ipt_entry *e) { return (struct xt_entry_target *)((char *)e + e->target_offset); } /* * Main firewall chains definitions and global var's definitions. */ #endif /* _UAPI_IPTABLES_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _KERNEL_EVENTS_INTERNAL_H #define _KERNEL_EVENTS_INTERNAL_H #include <linux/hardirq.h> #include <linux/uaccess.h> #include <linux/refcount.h> /* Buffer handling */ #define RING_BUFFER_WRITABLE 0x01 struct perf_buffer { refcount_t refcount; struct rcu_head rcu_head; #ifdef CONFIG_PERF_USE_VMALLOC struct work_struct work; int page_order; /* allocation order */ #endif int nr_pages; /* nr of data pages */ int overwrite; /* can overwrite itself */ int paused; /* can write into ring buffer */ atomic_t poll; /* POLL_ for wakeups */ local_t head; /* write position */ unsigned int nest; /* nested writers */ local_t events; /* event limit */ local_t wakeup; /* wakeup stamp */ local_t lost; /* nr records lost */ long watermark; /* wakeup watermark */ long aux_watermark; /* poll crap */ spinlock_t event_lock; struct list_head event_list; atomic_t mmap_count; unsigned long mmap_locked; struct user_struct *mmap_user; /* AUX area */ long aux_head; unsigned int aux_nest; long aux_wakeup; /* last aux_watermark boundary crossed by aux_head */ unsigned long aux_pgoff; int aux_nr_pages; int aux_overwrite; atomic_t aux_mmap_count; unsigned long aux_mmap_locked; void (*free_aux)(void *); refcount_t aux_refcount; int aux_in_sampling; void **aux_pages; void *aux_priv; struct perf_event_mmap_page *user_page; void *data_pages[]; }; extern void rb_free(struct perf_buffer *rb); static inline void rb_free_rcu(struct rcu_head *rcu_head) { struct perf_buffer *rb; rb = container_of(rcu_head, struct perf_buffer, rcu_head); rb_free(rb); } static inline void rb_toggle_paused(struct perf_buffer *rb, bool pause) { if (!pause && rb->nr_pages) rb->paused = 0; else rb->paused = 1; } extern struct perf_buffer * rb_alloc(int nr_pages, long watermark, int cpu, int flags); extern void perf_event_wakeup(struct perf_event *event); extern int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event, pgoff_t pgoff, int nr_pages, long watermark, int flags); extern void rb_free_aux(struct perf_buffer *rb); extern struct perf_buffer *ring_buffer_get(struct perf_event *event); extern void ring_buffer_put(struct perf_buffer *rb); static inline bool rb_has_aux(struct perf_buffer *rb) { return !!rb->aux_nr_pages; } void perf_event_aux_event(struct perf_event *event, unsigned long head, unsigned long size, u64 flags); extern struct page * perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff); #ifdef CONFIG_PERF_USE_VMALLOC /* * Back perf_mmap() with vmalloc memory. * * Required for architectures that have d-cache aliasing issues. */ static inline int page_order(struct perf_buffer *rb) { return rb->page_order; } #else static inline int page_order(struct perf_buffer *rb) { return 0; } #endif static inline unsigned long perf_data_size(struct perf_buffer *rb) { return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); } static inline unsigned long perf_aux_size(struct perf_buffer *rb) { return rb->aux_nr_pages << PAGE_SHIFT; } #define __DEFINE_OUTPUT_COPY_BODY(advance_buf, memcpy_func, ...) \ { \ unsigned long size, written; \ \ do { \ size = min(handle->size, len); \ written = memcpy_func(__VA_ARGS__); \ written = size - written; \ \ len -= written; \ handle->addr += written; \ if (advance_buf) \ buf += written; \ handle->size -= written; \ if (!handle->size) { \ struct perf_buffer *rb = handle->rb; \ \ handle->page++; \ handle->page &= rb->nr_pages - 1; \ handle->addr = rb->data_pages[handle->page]; \ handle->size = PAGE_SIZE << page_order(rb); \ } \ } while (len && written == size); \ \ return len; \ } #define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \ static inline unsigned long \ func_name(struct perf_output_handle *handle, \ const void *buf, unsigned long len) \ __DEFINE_OUTPUT_COPY_BODY(true, memcpy_func, handle->addr, buf, size) static inline unsigned long __output_custom(struct perf_output_handle *handle, perf_copy_f copy_func, const void *buf, unsigned long len) { unsigned long orig_len = len; __DEFINE_OUTPUT_COPY_BODY(false, copy_func, handle->addr, buf, orig_len - len, size) } static inline unsigned long memcpy_common(void *dst, const void *src, unsigned long n) { memcpy(dst, src, n); return 0; } DEFINE_OUTPUT_COPY(__output_copy, memcpy_common) static inline unsigned long memcpy_skip(void *dst, const void *src, unsigned long n) { return 0; } DEFINE_OUTPUT_COPY(__output_skip, memcpy_skip) #ifndef arch_perf_out_copy_user #define arch_perf_out_copy_user arch_perf_out_copy_user static inline unsigned long arch_perf_out_copy_user(void *dst, const void *src, unsigned long n) { unsigned long ret; pagefault_disable(); ret = __copy_from_user_inatomic(dst, src, n); pagefault_enable(); return ret; } #endif DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) static inline int get_recursion_context(int *recursion) { unsigned int pc = preempt_count(); unsigned char rctx = 0; rctx += !!(pc & (NMI_MASK)); rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK)); rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)); if (recursion[rctx]) return -1; recursion[rctx]++; barrier(); return rctx; } static inline void put_recursion_context(int *recursion, int rctx) { barrier(); recursion[rctx]--; } #ifdef CONFIG_HAVE_PERF_USER_STACK_DUMP static inline bool arch_perf_have_user_stack_dump(void) { return true; } #define perf_user_stack_pointer(regs) user_stack_pointer(regs) #else static inline bool arch_perf_have_user_stack_dump(void) { return false; } #define perf_user_stack_pointer(regs) 0 #endif /* CONFIG_HAVE_PERF_USER_STACK_DUMP */ #endif /* _KERNEL_EVENTS_INTERNAL_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 /* SPDX-License-Identifier: GPL-2.0 */ /* Rewritten and vastly simplified by Rusty Russell for in-kernel * module loader: * Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation */ #ifndef _LINUX_KALLSYMS_H #define _LINUX_KALLSYMS_H #include <linux/errno.h> #include <linux/kernel.h> #include <linux/stddef.h> #include <linux/mm.h> #include <linux/module.h> #include <asm/sections.h> #define KSYM_NAME_LEN 128 #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) struct cred; struct module; static inline int is_kernel_inittext(unsigned long addr) { if (addr >= (unsigned long)_sinittext && addr <= (unsigned long)_einittext) return 1; return 0; } static inline int is_kernel_text(unsigned long addr) { if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) || arch_is_kernel_text(addr)) return 1; return in_gate_area_no_mm(addr); } static inline int is_kernel(unsigned long addr) { if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) return 1; return in_gate_area_no_mm(addr); } static inline int is_ksym_addr(unsigned long addr) { if (IS_ENABLED(CONFIG_KALLSYMS_ALL)) return is_kernel(addr); return is_kernel_text(addr) || is_kernel_inittext(addr); } static inline void *dereference_symbol_descriptor(void *ptr) { #ifdef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR struct module *mod; ptr = dereference_kernel_function_descriptor(ptr); if (is_ksym_addr((unsigned long)ptr)) return ptr; preempt_disable(); mod = __module_address((unsigned long)ptr); preempt_enable(); if (mod) ptr = dereference_module_function_descriptor(mod, ptr); #endif return ptr; } #ifdef CONFIG_KALLSYMS /* Lookup the address for a symbol. Returns 0 if not found. */ unsigned long kallsyms_lookup_name(const char *name); /* Call a function on each kallsyms symbol in the core kernel */ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data); extern int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset); /* Lookup an address. modname is set to NULL if it's in the kernel. */ const char *kallsyms_lookup(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, char *namebuf); /* Look up a kernel symbol and return it in a text buffer. */ extern int sprint_symbol(char *buffer, unsigned long address); extern int sprint_symbol_no_offset(char *buffer, unsigned long address); extern int sprint_backtrace(char *buffer, unsigned long address); int lookup_symbol_name(unsigned long addr, char *symname); int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name); /* How and when do we show kallsyms values? */ extern bool kallsyms_show_value(const struct cred *cred); #else /* !CONFIG_KALLSYMS */ static inline unsigned long kallsyms_lookup_name(const char *name) { return 0; } static inline int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data) { return 0; } static inline int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset) { return 0; } static inline const char *kallsyms_lookup(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, char *namebuf) { return NULL; } static inline int sprint_symbol(char *buffer, unsigned long addr) { *buffer = '\0'; return 0; } static inline int sprint_symbol_no_offset(char *buffer, unsigned long addr) { *buffer = '\0'; return 0; } static inline int sprint_backtrace(char *buffer, unsigned long addr) { *buffer = '\0'; return 0; } static inline int lookup_symbol_name(unsigned long addr, char *symname) { return -ERANGE; } static inline int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name) { return -ERANGE; } static inline bool kallsyms_show_value(const struct cred *cred) { return false; } #endif /*CONFIG_KALLSYMS*/ static inline void print_ip_sym(const char *loglvl, unsigned long ip) { printk("%s[<%px>] %pS\n", loglvl, (void *) ip, (void *) ip); } #endif /*_LINUX_KALLSYMS_H*/
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCATTERLIST_H #define _LINUX_SCATTERLIST_H #include <linux/string.h> #include <linux/types.h> #include <linux/bug.h> #include <linux/mm.h> #include <asm/io.h> struct scatterlist { unsigned long page_link; unsigned int offset; unsigned int length; dma_addr_t dma_address; #ifdef CONFIG_NEED_SG_DMA_LENGTH unsigned int dma_length; #endif }; /* * Since the above length field is an unsigned int, below we define the maximum * length in bytes that can be stored in one scatterlist entry. */ #define SCATTERLIST_MAX_SEGMENT (UINT_MAX & PAGE_MASK) /* * These macros should be used after a dma_map_sg call has been done * to get bus addresses of each of the SG entries and their lengths. * You should only work with the number of sg entries dma_map_sg * returns, or alternatively stop on the first sg_dma_len(sg) which * is 0. */ #define sg_dma_address(sg) ((sg)->dma_address) #ifdef CONFIG_NEED_SG_DMA_LENGTH #define sg_dma_len(sg) ((sg)->dma_length) #else #define sg_dma_len(sg) ((sg)->length) #endif struct sg_table { struct scatterlist *sgl; /* the list */ unsigned int nents; /* number of mapped entries */ unsigned int orig_nents; /* original size of list */ }; /* * Notes on SG table design. * * We use the unsigned long page_link field in the scatterlist struct to place * the page pointer AND encode information about the sg table as well. The two * lower bits are reserved for this information. * * If bit 0 is set, then the page_link contains a pointer to the next sg * table list. Otherwise the next entry is at sg + 1. * * If bit 1 is set, then this sg entry is the last element in a list. * * See sg_next(). * */ #define SG_CHAIN 0x01UL #define SG_END 0x02UL /* * We overload the LSB of the page pointer to indicate whether it's * a valid sg entry, or whether it points to the start of a new scatterlist. * Those low bits are there for everyone! (thanks mason :-) */ #define sg_is_chain(sg) ((sg)->page_link & SG_CHAIN) #define sg_is_last(sg) ((sg)->page_link & SG_END) #define sg_chain_ptr(sg) \ ((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END))) /** * sg_assign_page - Assign a given page to an SG entry * @sg: SG entry * @page: The page * * Description: * Assign page to sg entry. Also see sg_set_page(), the most commonly used * variant. * **/ static inline void sg_assign_page(struct scatterlist *sg, struct page *page) { unsigned long page_link = sg->page_link & (SG_CHAIN | SG_END); /* * In order for the low bit stealing approach to work, pages * must be aligned at a 32-bit boundary as a minimum. */ BUG_ON((unsigned long) page & (SG_CHAIN | SG_END)); #ifdef CONFIG_DEBUG_SG BUG_ON(sg_is_chain(sg)); #endif sg->page_link = page_link | (unsigned long) page; } /** * sg_set_page - Set sg entry to point at given page * @sg: SG entry * @page: The page * @len: Length of data * @offset: Offset into page * * Description: * Use this function to set an sg entry pointing at a page, never assign * the page directly. We encode sg table information in the lower bits * of the page pointer. See sg_page() for looking up the page belonging * to an sg entry. * **/ static inline void sg_set_page(struct scatterlist *sg, struct page *page, unsigned int len, unsigned int offset) { sg_assign_page(sg, page); sg->offset = offset; sg->length = len; } static inline struct page *sg_page(struct scatterlist *sg) { #ifdef CONFIG_DEBUG_SG BUG_ON(sg_is_chain(sg)); #endif return (struct page *)((sg)->page_link & ~(SG_CHAIN | SG_END)); } /** * sg_set_buf - Set sg entry to point at given data * @sg: SG entry * @buf: Data * @buflen: Data length * **/ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, unsigned int buflen) { #ifdef CONFIG_DEBUG_SG BUG_ON(!virt_addr_valid(buf)); #endif sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf)); } /* * Loop over each sg element, following the pointer to a new list if necessary */ #define for_each_sg(sglist, sg, nr, __i) \ for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg)) /* * Loop over each sg element in the given sg_table object. */ #define for_each_sgtable_sg(sgt, sg, i) \ for_each_sg((sgt)->sgl, sg, (sgt)->orig_nents, i) /* * Loop over each sg element in the given *DMA mapped* sg_table object. * Please use sg_dma_address(sg) and sg_dma_len(sg) to extract DMA addresses * of the each element. */ #define for_each_sgtable_dma_sg(sgt, sg, i) \ for_each_sg((sgt)->sgl, sg, (sgt)->nents, i) static inline void __sg_chain(struct scatterlist *chain_sg, struct scatterlist *sgl) { /* * offset and length are unused for chain entry. Clear them. */ chain_sg->offset = 0; chain_sg->length = 0; /* * Set lowest bit to indicate a link pointer, and make sure to clear * the termination bit if it happens to be set. */ chain_sg->page_link = ((unsigned long) sgl | SG_CHAIN) & ~SG_END; } /** * sg_chain - Chain two sglists together * @prv: First scatterlist * @prv_nents: Number of entries in prv * @sgl: Second scatterlist * * Description: * Links @prv@ and @sgl@ together, to form a longer scatterlist. * **/ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, struct scatterlist *sgl) { __sg_chain(&prv[prv_nents - 1], sgl); } /** * sg_mark_end - Mark the end of the scatterlist * @sg: SG entryScatterlist * * Description: * Marks the passed in sg entry as the termination point for the sg * table. A call to sg_next() on this entry will return NULL. * **/ static inline void sg_mark_end(struct scatterlist *sg) { /* * Set termination bit, clear potential chain bit */ sg->page_link |= SG_END; sg->page_link &= ~SG_CHAIN; } /** * sg_unmark_end - Undo setting the end of the scatterlist * @sg: SG entryScatterlist * * Description: * Removes the termination marker from the given entry of the scatterlist. * **/ static inline void sg_unmark_end(struct scatterlist *sg) { sg->page_link &= ~SG_END; } /** * sg_phys - Return physical address of an sg entry * @sg: SG entry * * Description: * This calls page_to_phys() on the page in this sg entry, and adds the * sg offset. The caller must know that it is legal to call page_to_phys() * on the sg page. * **/ static inline dma_addr_t sg_phys(struct scatterlist *sg) { return page_to_phys(sg_page(sg)) + sg->offset; } /** * sg_virt - Return virtual address of an sg entry * @sg: SG entry * * Description: * This calls page_address() on the page in this sg entry, and adds the * sg offset. The caller must know that the sg page has a valid virtual * mapping. * **/ static inline void *sg_virt(struct scatterlist *sg) { return page_address(sg_page(sg)) + sg->offset; } /** * sg_init_marker - Initialize markers in sg table * @sgl: The SG table * @nents: Number of entries in table * **/ static inline void sg_init_marker(struct scatterlist *sgl, unsigned int nents) { sg_mark_end(&sgl[nents - 1]); } int sg_nents(struct scatterlist *sg); int sg_nents_for_len(struct scatterlist *sg, u64 len); struct scatterlist *sg_next(struct scatterlist *); struct scatterlist *sg_last(struct scatterlist *s, unsigned int); void sg_init_table(struct scatterlist *, unsigned int); void sg_init_one(struct scatterlist *, const void *, unsigned int); int sg_split(struct scatterlist *in, const int in_mapped_nents, const off_t skip, const int nb_splits, const size_t *split_sizes, struct scatterlist **out, int *out_mapped_nents, gfp_t gfp_mask); typedef struct scatterlist *(sg_alloc_fn)(unsigned int, gfp_t); typedef void (sg_free_fn)(struct scatterlist *, unsigned int); void __sg_free_table(struct sg_table *, unsigned int, unsigned int, sg_free_fn *); void sg_free_table(struct sg_table *); int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int, struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *); int sg_alloc_table(struct sg_table *, unsigned int, gfp_t); struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, unsigned int n_pages, unsigned int offset, unsigned long size, unsigned int max_segment, struct scatterlist *prv, unsigned int left_pages, gfp_t gfp_mask); int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, unsigned int n_pages, unsigned int offset, unsigned long size, gfp_t gfp_mask); #ifdef CONFIG_SGL_ALLOC struct scatterlist *sgl_alloc_order(unsigned long long length, unsigned int order, bool chainable, gfp_t gfp, unsigned int *nent_p); struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, unsigned int *nent_p); void sgl_free_n_order(struct scatterlist *sgl, int nents, int order); void sgl_free_order(struct scatterlist *sgl, int order); void sgl_free(struct scatterlist *sgl); #endif /* CONFIG_SGL_ALLOC */ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, size_t buflen, off_t skip, bool to_buffer); size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents, const void *buf, size_t buflen); size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, size_t buflen); size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents, const void *buf, size_t buflen, off_t skip); size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, size_t buflen, off_t skip); size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents, size_t buflen, off_t skip); /* * Maximum number of entries that will be allocated in one piece, if * a list larger than this is required then chaining will be utilized. */ #define SG_MAX_SINGLE_ALLOC (PAGE_SIZE / sizeof(struct scatterlist)) /* * The maximum number of SG segments that we will put inside a * scatterlist (unless chaining is used). Should ideally fit inside a * single page, to avoid a higher order allocation. We could define this * to SG_MAX_SINGLE_ALLOC to pack correctly at the highest order. The * minimum value is 32 */ #define SG_CHUNK_SIZE 128 /* * Like SG_CHUNK_SIZE, but for archs that have sg chaining. This limit * is totally arbitrary, a setting of 2048 will get you at least 8mb ios. */ #ifdef CONFIG_ARCH_NO_SG_CHAIN #define SG_MAX_SEGMENTS SG_CHUNK_SIZE #else #define SG_MAX_SEGMENTS 2048 #endif #ifdef CONFIG_SG_POOL void sg_free_table_chained(struct sg_table *table, unsigned nents_first_chunk); int sg_alloc_table_chained(struct sg_table *table, int nents, struct scatterlist *first_chunk, unsigned nents_first_chunk); #endif /* * sg page iterator * * Iterates over sg entries page-by-page. On each successful iteration, you * can call sg_page_iter_page(@piter) to get the current page. * @piter->sg will point to the sg holding this page and @piter->sg_pgoffset to * the page's page offset within the sg. The iteration will stop either when a * maximum number of sg entries was reached or a terminating sg * (sg_last(sg) == true) was reached. */ struct sg_page_iter { struct scatterlist *sg; /* sg holding the page */ unsigned int sg_pgoffset; /* page offset within the sg */ /* these are internal states, keep away */ unsigned int __nents; /* remaining sg entries */ int __pg_advance; /* nr pages to advance at the * next step */ }; /* * sg page iterator for DMA addresses * * This is the same as sg_page_iter however you can call * sg_page_iter_dma_address(@dma_iter) to get the page's DMA * address. sg_page_iter_page() cannot be called on this iterator. */ struct sg_dma_page_iter { struct sg_page_iter base; }; bool __sg_page_iter_next(struct sg_page_iter *piter); bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter); void __sg_page_iter_start(struct sg_page_iter *piter, struct scatterlist *sglist, unsigned int nents, unsigned long pgoffset); /** * sg_page_iter_page - get the current page held by the page iterator * @piter: page iterator holding the page */ static inline struct page *sg_page_iter_page(struct sg_page_iter *piter) { return nth_page(sg_page(piter->sg), piter->sg_pgoffset); } /** * sg_page_iter_dma_address - get the dma address of the current page held by * the page iterator. * @dma_iter: page iterator holding the page */ static inline dma_addr_t sg_page_iter_dma_address(struct sg_dma_page_iter *dma_iter) { return sg_dma_address(dma_iter->base.sg) + (dma_iter->base.sg_pgoffset << PAGE_SHIFT); } /** * for_each_sg_page - iterate over the pages of the given sg list * @sglist: sglist to iterate over * @piter: page iterator to hold current page, sg, sg_pgoffset * @nents: maximum number of sg entries to iterate over * @pgoffset: starting page offset (in pages) * * Callers may use sg_page_iter_page() to get each page pointer. * In each loop it operates on PAGE_SIZE unit. */ #define for_each_sg_page(sglist, piter, nents, pgoffset) \ for (__sg_page_iter_start((piter), (sglist), (nents), (pgoffset)); \ __sg_page_iter_next(piter);) /** * for_each_sg_dma_page - iterate over the pages of the given sg list * @sglist: sglist to iterate over * @dma_iter: DMA page iterator to hold current page * @dma_nents: maximum number of sg entries to iterate over, this is the value * returned from dma_map_sg * @pgoffset: starting page offset (in pages) * * Callers may use sg_page_iter_dma_address() to get each page's DMA address. * In each loop it operates on PAGE_SIZE unit. */ #define for_each_sg_dma_page(sglist, dma_iter, dma_nents, pgoffset) \ for (__sg_page_iter_start(&(dma_iter)->base, sglist, dma_nents, \ pgoffset); \ __sg_page_iter_dma_next(dma_iter);) /** * for_each_sgtable_page - iterate over all pages in the sg_table object * @sgt: sg_table object to iterate over * @piter: page iterator to hold current page * @pgoffset: starting page offset (in pages) * * Iterates over the all memory pages in the buffer described by * a scatterlist stored in the given sg_table object. * See also for_each_sg_page(). In each loop it operates on PAGE_SIZE unit. */ #define for_each_sgtable_page(sgt, piter, pgoffset) \ for_each_sg_page((sgt)->sgl, piter, (sgt)->orig_nents, pgoffset) /** * for_each_sgtable_dma_page - iterate over the DMA mapped sg_table object * @sgt: sg_table object to iterate over * @dma_iter: DMA page iterator to hold current page * @pgoffset: starting page offset (in pages) * * Iterates over the all DMA mapped pages in the buffer described by * a scatterlist stored in the given sg_table object. * See also for_each_sg_dma_page(). In each loop it operates on PAGE_SIZE * unit. */ #define for_each_sgtable_dma_page(sgt, dma_iter, pgoffset) \ for_each_sg_dma_page((sgt)->sgl, dma_iter, (sgt)->nents, pgoffset) /* * Mapping sg iterator * * Iterates over sg entries mapping page-by-page. On each successful * iteration, @miter->page points to the mapped page and * @miter->length bytes of data can be accessed at @miter->addr. As * long as an interation is enclosed between start and stop, the user * is free to choose control structure and when to stop. * * @miter->consumed is set to @miter->length on each iteration. It * can be adjusted if the user can't consume all the bytes in one go. * Also, a stopped iteration can be resumed by calling next on it. * This is useful when iteration needs to release all resources and * continue later (e.g. at the next interrupt). */ #define SG_MITER_ATOMIC (1 << 0) /* use kmap_atomic */ #define SG_MITER_TO_SG (1 << 1) /* flush back to phys on unmap */ #define SG_MITER_FROM_SG (1 << 2) /* nop */ struct sg_mapping_iter { /* the following three fields can be accessed directly */ struct page *page; /* currently mapped page */ void *addr; /* pointer to the mapped area */ size_t length; /* length of the mapped area */ size_t consumed; /* number of consumed bytes */ struct sg_page_iter piter; /* page iterator */ /* these are internal states, keep away */ unsigned int __offset; /* offset within page */ unsigned int __remaining; /* remaining bytes on page */ unsigned int __flags; }; void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl, unsigned int nents, unsigned int flags); bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset); bool sg_miter_next(struct sg_mapping_iter *miter); void sg_miter_stop(struct sg_mapping_iter *miter); #endif /* _LINUX_SCATTERLIST_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 /* SPDX-License-Identifier: GPL-2.0-only */ #ifndef LLIST_H #define LLIST_H /* * Lock-less NULL terminated single linked list * * Cases where locking is not needed: * If there are multiple producers and multiple consumers, llist_add can be * used in producers and llist_del_all can be used in consumers simultaneously * without locking. Also a single consumer can use llist_del_first while * multiple producers simultaneously use llist_add, without any locking. * * Cases where locking is needed: * If we have multiple consumers with llist_del_first used in one consumer, and * llist_del_first or llist_del_all used in other consumers, then a lock is * needed. This is because llist_del_first depends on list->first->next not * changing, but without lock protection, there's no way to be sure about that * if a preemption happens in the middle of the delete operation and on being * preempted back, the list->first is the same as before causing the cmpxchg in * llist_del_first to succeed. For example, while a llist_del_first operation * is in progress in one consumer, then a llist_del_first, llist_add, * llist_add (or llist_del_all, llist_add, llist_add) sequence in another * consumer may cause violations. * * This can be summarized as follows: * * | add | del_first | del_all * add | - | - | - * del_first | | L | L * del_all | | | - * * Where, a particular row's operation can happen concurrently with a column's * operation, with "-" being no lock needed, while "L" being lock is needed. * * The list entries deleted via llist_del_all can be traversed with * traversing function such as llist_for_each etc. But the list * entries can not be traversed safely before deleted from the list. * The order of deleted entries is from the newest to the oldest added * one. If you want to traverse from the oldest to the newest, you * must reverse the order by yourself before traversing. * * The basic atomic operation of this list is cmpxchg on long. On * architectures that don't have NMI-safe cmpxchg implementation, the * list can NOT be used in NMI handlers. So code that uses the list in * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. * * Copyright 2010,2011 Intel Corp. * Author: Huang Ying <ying.huang@intel.com> */ #include <linux/atomic.h> #include <linux/kernel.h> struct llist_head { struct llist_node *first; }; struct llist_node { struct llist_node *next; }; #define LLIST_HEAD_INIT(name) { NULL } #define LLIST_HEAD(name) struct llist_head name = LLIST_HEAD_INIT(name) /** * init_llist_head - initialize lock-less list head * @head: the head for your lock-less list */ static inline void init_llist_head(struct llist_head *list) { list->first = NULL; } /** * llist_entry - get the struct of this entry * @ptr: the &struct llist_node pointer. * @type: the type of the struct this is embedded in. * @member: the name of the llist_node within the struct. */ #define llist_entry(ptr, type, member) \ container_of(ptr, type, member) /** * member_address_is_nonnull - check whether the member address is not NULL * @ptr: the object pointer (struct type * that contains the llist_node) * @member: the name of the llist_node within the struct. * * This macro is conceptually the same as * &ptr->member != NULL * but it works around the fact that compilers can decide that taking a member * address is never a NULL pointer. * * Real objects that start at a high address and have a member at NULL are * unlikely to exist, but such pointers may be returned e.g. by the * container_of() macro. */ #define member_address_is_nonnull(ptr, member) \ ((uintptr_t)(ptr) + offsetof(typeof(*(ptr)), member) != 0) /** * llist_for_each - iterate over some deleted entries of a lock-less list * @pos: the &struct llist_node to use as a loop cursor * @node: the first entry of deleted list entries * * In general, some entries of the lock-less list can be traversed * safely only after being deleted from list, so start with an entry * instead of list head. * * If being used on entries deleted from lock-less list directly, the * traverse order is from the newest to the oldest added entry. If * you want to traverse from the oldest to the newest, you must * reverse the order by yourself before traversing. */ #define llist_for_each(pos, node) \ for ((pos) = (node); pos; (pos) = (pos)->next) /** * llist_for_each_safe - iterate over some deleted entries of a lock-less list * safe against removal of list entry * @pos: the &struct llist_node to use as a loop cursor * @n: another &struct llist_node to use as temporary storage * @node: the first entry of deleted list entries * * In general, some entries of the lock-less list can be traversed * safely only after being deleted from list, so start with an entry * instead of list head. * * If being used on entries deleted from lock-less list directly, the * traverse order is from the newest to the oldest added entry. If * you want to traverse from the oldest to the newest, you must * reverse the order by yourself before traversing. */ #define llist_for_each_safe(pos, n, node) \ for ((pos) = (node); (pos) && ((n) = (pos)->next, true); (pos) = (n)) /** * llist_for_each_entry - iterate over some deleted entries of lock-less list of given type * @pos: the type * to use as a loop cursor. * @node: the fist entry of deleted list entries. * @member: the name of the llist_node with the struct. * * In general, some entries of the lock-less list can be traversed * safely only after being removed from list, so start with an entry * instead of list head. * * If being used on entries deleted from lock-less list directly, the * traverse order is from the newest to the oldest added entry. If * you want to traverse from the oldest to the newest, you must * reverse the order by yourself before traversing. */ #define llist_for_each_entry(pos, node, member) \ for ((pos) = llist_entry((node), typeof(*(pos)), member); \ member_address_is_nonnull(pos, member); \ (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) /** * llist_for_each_entry_safe - iterate over some deleted entries of lock-less list of given type * safe against removal of list entry * @pos: the type * to use as a loop cursor. * @n: another type * to use as temporary storage * @node: the first entry of deleted list entries. * @member: the name of the llist_node with the struct. * * In general, some entries of the lock-less list can be traversed * safely only after being removed from list, so start with an entry * instead of list head. * * If being used on entries deleted from lock-less list directly, the * traverse order is from the newest to the oldest added entry. If * you want to traverse from the oldest to the newest, you must * reverse the order by yourself before traversing. */ #define llist_for_each_entry_safe(pos, n, node, member) \ for (pos = llist_entry((node), typeof(*pos), member); \ member_address_is_nonnull(pos, member) && \ (n = llist_entry(pos->member.next, typeof(*n), member), true); \ pos = n) /** * llist_empty - tests whether a lock-less list is empty * @head: the list to test * * Not guaranteed to be accurate or up to date. Just a quick way to * test whether the list is empty without deleting something from the * list. */ static inline bool llist_empty(const struct llist_head *head) { return READ_ONCE(head->first) == NULL; } static inline struct llist_node *llist_next(struct llist_node *node) { return node->next; } extern bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, struct llist_head *head); /** * llist_add - add a new entry * @new: new entry to be added * @head: the head for your lock-less list * * Returns true if the list was empty prior to adding this entry. */ static inline bool llist_add(struct llist_node *new, struct llist_head *head) { return llist_add_batch(new, new, head); } /** * llist_del_all - delete all entries from lock-less list * @head: the head of lock-less list to delete all entries * * If list is empty, return NULL, otherwise, delete all entries and * return the pointer to the first entry. The order of entries * deleted is from the newest to the oldest added one. */ static inline struct llist_node *llist_del_all(struct llist_head *head) { return xchg(&head->first, NULL); } extern struct llist_node *llist_del_first(struct llist_head *head); struct llist_node *llist_reverse_order(struct llist_node *head); #endif /* LLIST_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 /* SPDX-License-Identifier: GPL-2.0 */ /* interrupt.h */ #ifndef _LINUX_INTERRUPT_H #define _LINUX_INTERRUPT_H #include <linux/kernel.h> #include <linux/bitops.h> #include <linux/cpumask.h> #include <linux/irqreturn.h> #include <linux/irqnr.h> #include <linux/hardirq.h> #include <linux/irqflags.h> #include <linux/hrtimer.h> #include <linux/kref.h> #include <linux/workqueue.h> #include <linux/atomic.h> #include <asm/ptrace.h> #include <asm/irq.h> #include <asm/sections.h> /* * These correspond to the IORESOURCE_IRQ_* defines in * linux/ioport.h to select the interrupt line behaviour. When * requesting an interrupt without specifying a IRQF_TRIGGER, the * setting should be assumed to be "as already configured", which * may be as per machine or firmware initialisation. */ #define IRQF_TRIGGER_NONE 0x00000000 #define IRQF_TRIGGER_RISING 0x00000001 #define IRQF_TRIGGER_FALLING 0x00000002 #define IRQF_TRIGGER_HIGH 0x00000004 #define IRQF_TRIGGER_LOW 0x00000008 #define IRQF_TRIGGER_MASK (IRQF_TRIGGER_HIGH | IRQF_TRIGGER_LOW | \ IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING) #define IRQF_TRIGGER_PROBE 0x00000010 /* * These flags used only by the kernel as part of the * irq handling routines. * * IRQF_SHARED - allow sharing the irq among several devices * IRQF_PROBE_SHARED - set by callers when they expect sharing mismatches to occur * IRQF_TIMER - Flag to mark this interrupt as timer interrupt * IRQF_PERCPU - Interrupt is per cpu * IRQF_NOBALANCING - Flag to exclude this interrupt from irq balancing * IRQF_IRQPOLL - Interrupt is used for polling (only the interrupt that is * registered first in a shared interrupt is considered for * performance reasons) * IRQF_ONESHOT - Interrupt is not reenabled after the hardirq handler finished. * Used by threaded interrupts which need to keep the * irq line disabled until the threaded handler has been run. * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend. Does not guarantee * that this interrupt will wake the system from a suspended * state. See Documentation/power/suspend-and-interrupts.rst * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set * IRQF_NO_THREAD - Interrupt cannot be threaded * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device * resume time. * IRQF_COND_SUSPEND - If the IRQ is shared with a NO_SUSPEND user, execute this * interrupt handler after suspending interrupts. For system * wakeup devices users need to implement wakeup detection in * their interrupt handlers. */ #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 #define __IRQF_TIMER 0x00000200 #define IRQF_PERCPU 0x00000400 #define IRQF_NOBALANCING 0x00000800 #define IRQF_IRQPOLL 0x00001000 #define IRQF_ONESHOT 0x00002000 #define IRQF_NO_SUSPEND 0x00004000 #define IRQF_FORCE_RESUME 0x00008000 #define IRQF_NO_THREAD 0x00010000 #define IRQF_EARLY_RESUME 0x00020000 #define IRQF_COND_SUSPEND 0x00040000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) /* * These values can be returned by request_any_context_irq() and * describe the context the interrupt will be run in. * * IRQC_IS_HARDIRQ - interrupt runs in hardirq context * IRQC_IS_NESTED - interrupt runs in a nested threaded context */ enum { IRQC_IS_HARDIRQ = 0, IRQC_IS_NESTED, }; typedef irqreturn_t (*irq_handler_t)(int, void *); /** * struct irqaction - per interrupt action descriptor * @handler: interrupt handler function * @name: name of the device * @dev_id: cookie to identify the device * @percpu_dev_id: cookie to identify the device * @next: pointer to the next irqaction for shared interrupts * @irq: interrupt number * @flags: flags (see IRQF_* above) * @thread_fn: interrupt handler function for threaded interrupts * @thread: thread pointer for threaded interrupts * @secondary: pointer to secondary irqaction (force threading) * @thread_flags: flags related to @thread * @thread_mask: bitmask for keeping track of @thread activity * @dir: pointer to the proc/irq/NN/name entry */ struct irqaction { irq_handler_t handler; void *dev_id; void __percpu *percpu_dev_id; struct irqaction *next; irq_handler_t thread_fn; struct task_struct *thread; struct irqaction *secondary; unsigned int irq; unsigned int flags; unsigned long thread_flags; unsigned long thread_mask; const char *name; struct proc_dir_entry *dir; } ____cacheline_internodealigned_in_smp; extern irqreturn_t no_action(int cpl, void *dev_id); /* * If a (PCI) device interrupt is not connected we set dev->irq to * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we * can distingiush that case from other error returns. * * 0x80000000 is guaranteed to be outside the available range of interrupts * and easy to distinguish from other possible incorrect values. */ #define IRQ_NOTCONNECTED (1U << 31) extern int __must_check request_threaded_irq(unsigned int irq, irq_handler_t handler, irq_handler_t thread_fn, unsigned long flags, const char *name, void *dev); /** * request_irq - Add a handler for an interrupt line * @irq: The interrupt line to allocate * @handler: Function to be called when the IRQ occurs. * Primary handler for threaded interrupts * If NULL, the default primary handler is installed * @flags: Handling flags * @name: Name of the device generating this interrupt * @dev: A cookie passed to the handler function * * This call allocates an interrupt and establishes a handler; see * the documentation for request_threaded_irq() for details. */ static inline int __must_check request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *name, void *dev) { return request_threaded_irq(irq, handler, NULL, flags, name, dev); } extern int __must_check request_any_context_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *name, void *dev_id); extern int __must_check __request_percpu_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *devname, void __percpu *percpu_dev_id); extern int __must_check request_nmi(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *name, void *dev); static inline int __must_check request_percpu_irq(unsigned int irq, irq_handler_t handler, const char *devname, void __percpu *percpu_dev_id) { return __request_percpu_irq(irq, handler, 0, devname, percpu_dev_id); } extern int __must_check request_percpu_nmi(unsigned int irq, irq_handler_t handler, const char *devname, void __percpu *dev); extern const void *free_irq(unsigned int, void *); extern void free_percpu_irq(unsigned int, void __percpu *); extern const void *free_nmi(unsigned int irq, void *dev_id); extern void free_percpu_nmi(unsigned int irq, void __percpu *percpu_dev_id); struct device; extern int __must_check devm_request_threaded_irq(struct device *dev, unsigned int irq, irq_handler_t handler, irq_handler_t thread_fn, unsigned long irqflags, const char *devname, void *dev_id); static inline int __must_check devm_request_irq(struct device *dev, unsigned int irq, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id) { return devm_request_threaded_irq(dev, irq, handler, NULL, irqflags, devname, dev_id); } extern int __must_check devm_request_any_context_irq(struct device *dev, unsigned int irq, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id); extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); /* * On lockdep we dont want to enable hardirqs in hardirq * context. Use local_irq_enable_in_hardirq() to annotate * kernel code that has to do this nevertheless (pretty much * the only valid case is for old/broken hardware that is * insanely slow). * * NOTE: in theory this might break fragile code that relies * on hardirq delivery - in practice we dont seem to have such * places left. So the only effect should be slightly increased * irqs-off latencies. */ #ifdef CONFIG_LOCKDEP # define local_irq_enable_in_hardirq() do { } while (0) #else # define local_irq_enable_in_hardirq() local_irq_enable() #endif extern void disable_irq_nosync(unsigned int irq); extern bool disable_hardirq(unsigned int irq); extern void disable_irq(unsigned int irq); extern void disable_percpu_irq(unsigned int irq); extern void enable_irq(unsigned int irq); extern void enable_percpu_irq(unsigned int irq, unsigned int type); extern bool irq_percpu_is_enabled(unsigned int irq); extern void irq_wake_thread(unsigned int irq, void *dev_id); extern void disable_nmi_nosync(unsigned int irq); extern void disable_percpu_nmi(unsigned int irq); extern void enable_nmi(unsigned int irq); extern void enable_percpu_nmi(unsigned int irq, unsigned int type); extern int prepare_percpu_nmi(unsigned int irq); extern void teardown_percpu_nmi(unsigned int irq); extern int irq_inject_interrupt(unsigned int irq); /* The following three functions are for the core kernel use only. */ extern void suspend_device_irqs(void); extern void resume_device_irqs(void); extern void rearm_wake_irq(unsigned int irq); /** * struct irq_affinity_notify - context for notification of IRQ affinity changes * @irq: Interrupt to which notification applies * @kref: Reference count, for internal use * @work: Work item, for internal use * @notify: Function to be called on change. This will be * called in process context. * @release: Function to be called on release. This will be * called in process context. Once registered, the * structure must only be freed when this function is * called or later. */ struct irq_affinity_notify { unsigned int irq; struct kref kref; struct work_struct work; void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask); void (*release)(struct kref *ref); }; #define IRQ_AFFINITY_MAX_SETS 4 /** * struct irq_affinity - Description for automatic irq affinity assignements * @pre_vectors: Don't apply affinity to @pre_vectors at beginning of * the MSI(-X) vector space * @post_vectors: Don't apply affinity to @post_vectors at end of * the MSI(-X) vector space * @nr_sets: The number of interrupt sets for which affinity * spreading is required * @set_size: Array holding the size of each interrupt set * @calc_sets: Callback for calculating the number and size * of interrupt sets * @priv: Private data for usage by @calc_sets, usually a * pointer to driver/device specific data. */ struct irq_affinity { unsigned int pre_vectors; unsigned int post_vectors; unsigned int nr_sets; unsigned int set_size[IRQ_AFFINITY_MAX_SETS]; void (*calc_sets)(struct irq_affinity *, unsigned int nvecs); void *priv; }; /** * struct irq_affinity_desc - Interrupt affinity descriptor * @mask: cpumask to hold the affinity assignment * @is_managed: 1 if the interrupt is managed internally */ struct irq_affinity_desc { struct cpumask mask; unsigned int is_managed : 1; }; #if defined(CONFIG_SMP) extern cpumask_var_t irq_default_affinity; /* Internal implementation. Use the helpers below */ extern int __irq_set_affinity(unsigned int irq, const struct cpumask *cpumask, bool force); /** * irq_set_affinity - Set the irq affinity of a given irq * @irq: Interrupt to set affinity * @cpumask: cpumask * * Fails if cpumask does not contain an online CPU */ static inline int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) { return __irq_set_affinity(irq, cpumask, false); } /** * irq_force_affinity - Force the irq affinity of a given irq * @irq: Interrupt to set affinity * @cpumask: cpumask * * Same as irq_set_affinity, but without checking the mask against * online cpus. * * Solely for low level cpu hotplug code, where we need to make per * cpu interrupts affine before the cpu becomes online. */ static inline int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) { return __irq_set_affinity(irq, cpumask, true); } extern int irq_can_set_affinity(unsigned int irq); extern int irq_select_affinity(unsigned int irq); extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m); extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); struct irq_affinity_desc * irq_create_affinity_masks(unsigned int nvec, struct irq_affinity *affd); unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, const struct irq_affinity *affd); #else /* CONFIG_SMP */ static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m) { return -EINVAL; } static inline int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) { return 0; } static inline int irq_can_set_affinity(unsigned int irq) { return 0; } static inline int irq_select_affinity(unsigned int irq) { return 0; } static inline int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) { return -EINVAL; } static inline int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) { return 0; } static inline struct irq_affinity_desc * irq_create_affinity_masks(unsigned int nvec, struct irq_affinity *affd) { return NULL; } static inline unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, const struct irq_affinity *affd) { return maxvec; } #endif /* CONFIG_SMP */ /* * Special lockdep variants of irq disabling/enabling. * These should be used for locking constructs that * know that a particular irq context which is disabled, * and which is the only irq-context user of a lock, * that it's safe to take the lock in the irq-disabled * section without disabling hardirqs. * * On !CONFIG_LOCKDEP they are equivalent to the normal * irq disable/enable methods. */ static inline void disable_irq_nosync_lockdep(unsigned int irq) { disable_irq_nosync(irq); #ifdef CONFIG_LOCKDEP local_irq_disable(); #endif } static inline void disable_irq_nosync_lockdep_irqsave(unsigned int irq, unsigned long *flags) { disable_irq_nosync(irq); #ifdef CONFIG_LOCKDEP local_irq_save(*flags); #endif } static inline void disable_irq_lockdep(unsigned int irq) { disable_irq(irq); #ifdef CONFIG_LOCKDEP local_irq_disable(); #endif } static inline void enable_irq_lockdep(unsigned int irq) { #ifdef CONFIG_LOCKDEP local_irq_enable(); #endif enable_irq(irq); } static inline void enable_irq_lockdep_irqrestore(unsigned int irq, unsigned long *flags) { #ifdef CONFIG_LOCKDEP local_irq_restore(*flags); #endif enable_irq(irq); } /* IRQ wakeup (PM) control: */ extern int irq_set_irq_wake(unsigned int irq, unsigned int on); static inline int enable_irq_wake(unsigned int irq) { return irq_set_irq_wake(irq, 1); } static inline int disable_irq_wake(unsigned int irq) { return irq_set_irq_wake(irq, 0); } /* * irq_get_irqchip_state/irq_set_irqchip_state specific flags */ enum irqchip_irq_state { IRQCHIP_STATE_PENDING, /* Is interrupt pending? */ IRQCHIP_STATE_ACTIVE, /* Is interrupt in progress? */ IRQCHIP_STATE_MASKED, /* Is interrupt masked? */ IRQCHIP_STATE_LINE_LEVEL, /* Is IRQ line high? */ }; extern int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which, bool *state); extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, bool state); #ifdef CONFIG_IRQ_FORCED_THREADING # ifdef CONFIG_PREEMPT_RT # define force_irqthreads (true) # else extern bool force_irqthreads; # endif #else #define force_irqthreads (0) #endif #ifndef local_softirq_pending #ifndef local_softirq_pending_ref #define local_softirq_pending_ref irq_stat.__softirq_pending #endif #define local_softirq_pending() (__this_cpu_read(local_softirq_pending_ref)) #define set_softirq_pending(x) (__this_cpu_write(local_softirq_pending_ref, (x))) #define or_softirq_pending(x) (__this_cpu_or(local_softirq_pending_ref, (x))) #endif /* local_softirq_pending */ /* Some architectures might implement lazy enabling/disabling of * interrupts. In some cases, such as stop_machine, we might want * to ensure that after a local_irq_disable(), interrupts have * really been disabled in hardware. Such architectures need to * implement the following hook. */ #ifndef hard_irq_disable #define hard_irq_disable() do { } while(0) #endif /* PLEASE, avoid to allocate new softirqs, if you need not _really_ high frequency threaded job scheduling. For almost all the purposes tasklets are more than enough. F.e. all serial device BHs et al. should be converted to tasklets, not to softirqs. */ enum { HI_SOFTIRQ=0, TIMER_SOFTIRQ, NET_TX_SOFTIRQ, NET_RX_SOFTIRQ, BLOCK_SOFTIRQ, IRQ_POLL_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS }; #define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. */ extern const char * const softirq_to_name[NR_SOFTIRQS]; /* softirq mask and active fields moved to irq_cpustat_t in * asm/hardirq.h to get better cache usage. KAO */ struct softirq_action { void (*action)(struct softirq_action *); }; asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); #ifdef __ARCH_HAS_DO_SOFTIRQ void do_softirq_own_stack(void); #else static inline void do_softirq_own_stack(void) { __do_softirq(); } #endif extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); extern void __raise_softirq_irqoff(unsigned int nr); extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); DECLARE_PER_CPU(struct task_struct *, ksoftirqd); static inline struct task_struct *this_cpu_ksoftirqd(void) { return this_cpu_read(ksoftirqd); } /* Tasklets --- multithreaded analogue of BHs. This API is deprecated. Please consider using threaded IRQs instead: https://lore.kernel.org/lkml/20200716081538.2sivhkj4hcyrusem@linutronix.de Main feature differing them of generic softirqs: tasklet is running only on one CPU simultaneously. Main feature differing them of BHs: different tasklets may be run simultaneously on different CPUs. Properties: * If tasklet_schedule() is called, then tasklet is guaranteed to be executed on some cpu at least once after this. * If the tasklet is already scheduled, but its execution is still not started, it will be executed only once. * If this tasklet is already running on another CPU (or schedule is called from tasklet itself), it is rescheduled for later. * Tasklet is strictly serialized wrt itself, but not wrt another tasklets. If client needs some intertask synchronization, he makes it with spinlocks. */ struct tasklet_struct { struct tasklet_struct *next; unsigned long state; atomic_t count; bool use_callback; union { void (*func)(unsigned long data); void (*callback)(struct tasklet_struct *t); }; unsigned long data; }; #define DECLARE_TASKLET(name, _callback) \ struct tasklet_struct name = { \ .count = ATOMIC_INIT(0), \ .callback = _callback, \ .use_callback = true, \ } #define DECLARE_TASKLET_DISABLED(name, _callback) \ struct tasklet_struct name = { \ .count = ATOMIC_INIT(1), \ .callback = _callback, \ .use_callback = true, \ } #define from_tasklet(var, callback_tasklet, tasklet_fieldname) \ container_of(callback_tasklet, typeof(*var), tasklet_fieldname) #define DECLARE_TASKLET_OLD(name, _func) \ struct tasklet_struct name = { \ .count = ATOMIC_INIT(0), \ .func = _func, \ } #define DECLARE_TASKLET_DISABLED_OLD(name, _func) \ struct tasklet_struct name = { \ .count = ATOMIC_INIT(1), \ .func = _func, \ } enum { TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */ TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ }; #ifdef CONFIG_SMP static inline int tasklet_trylock(struct tasklet_struct *t) { return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); } static inline void tasklet_unlock(struct tasklet_struct *t) { smp_mb__before_atomic(); clear_bit(TASKLET_STATE_RUN, &(t)->state); } static inline void tasklet_unlock_wait(struct tasklet_struct *t) { while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } } #else #define tasklet_trylock(t) 1 #define tasklet_unlock_wait(t) do { } while (0) #define tasklet_unlock(t) do { } while (0) #endif extern void __tasklet_schedule(struct tasklet_struct *t); static inline void tasklet_schedule(struct tasklet_struct *t) { if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) __tasklet_schedule(t); } extern void __tasklet_hi_schedule(struct tasklet_struct *t); static inline void tasklet_hi_schedule(struct tasklet_struct *t) { if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) __tasklet_hi_schedule(t); } static inline void tasklet_disable_nosync(struct tasklet_struct *t) { atomic_inc(&t->count); smp_mb__after_atomic(); } static inline void tasklet_disable(struct tasklet_struct *t) { tasklet_disable_nosync(t); tasklet_unlock_wait(t); smp_mb(); } static inline void tasklet_enable(struct tasklet_struct *t) { smp_mb__before_atomic(); atomic_dec(&t->count); } extern void tasklet_kill(struct tasklet_struct *t); extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); extern void tasklet_init(struct tasklet_struct *t, void (*func)(unsigned long), unsigned long data); extern void tasklet_setup(struct tasklet_struct *t, void (*callback)(struct tasklet_struct *)); /* * Autoprobing for irqs: * * probe_irq_on() and probe_irq_off() provide robust primitives * for accurate IRQ probing during kernel initialization. They are * reasonably simple to use, are not "fooled" by spurious interrupts, * and, unlike other attempts at IRQ probing, they do not get hung on * stuck interrupts (such as unused PS2 mouse interfaces on ASUS boards). * * For reasonably foolproof probing, use them as follows: * * 1. clear and/or mask the device's internal interrupt. * 2. sti(); * 3. irqs = probe_irq_on(); // "take over" all unassigned idle IRQs * 4. enable the device and cause it to trigger an interrupt. * 5. wait for the device to interrupt, using non-intrusive polling or a delay. * 6. irq = probe_irq_off(irqs); // get IRQ number, 0=none, negative=multiple * 7. service the device to clear its pending interrupt. * 8. loop again if paranoia is required. * * probe_irq_on() returns a mask of allocated irq's. * * probe_irq_off() takes the mask as a parameter, * and returns the irq number which occurred, * or zero if none occurred, or a negative irq number * if more than one irq occurred. */ #if !defined(CONFIG_GENERIC_IRQ_PROBE) static inline unsigned long probe_irq_on(void) { return 0; } static inline int probe_irq_off(unsigned long val) { return 0; } static inline unsigned int probe_irq_mask(unsigned long val) { return 0; } #else extern unsigned long probe_irq_on(void); /* returns 0 on failure */ extern int probe_irq_off(unsigned long); /* returns 0 or negative on failure */ extern unsigned int probe_irq_mask(unsigned long); /* returns mask of ISA interrupts */ #endif #ifdef CONFIG_PROC_FS /* Initialize /proc/irq/ */ extern void init_irq_proc(void); #else static inline void init_irq_proc(void) { } #endif #ifdef CONFIG_IRQ_TIMINGS void irq_timings_enable(void); void irq_timings_disable(void); u64 irq_timings_next_event(u64 now); #endif struct seq_file; int show_interrupts(struct seq_file *p, void *v); int arch_show_interrupts(struct seq_file *p, int prec); extern int early_irq_init(void); extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); /* * We want to know which function is an entrypoint of a hardirq or a softirq. */ #ifndef __irq_entry # define __irq_entry __section(".irqentry.text") #endif #define __softirq_entry __section(".softirqentry.text") #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_UDP_TUNNEL_H #define __NET_UDP_TUNNEL_H #include <net/ip_tunnels.h> #include <net/udp.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #include <net/ipv6_stubs.h> #endif struct udp_port_cfg { u8 family; /* Used only for kernel-created sockets */ union { struct in_addr local_ip; #if IS_ENABLED(CONFIG_IPV6) struct in6_addr local_ip6; #endif }; union { struct in_addr peer_ip; #if IS_ENABLED(CONFIG_IPV6) struct in6_addr peer_ip6; #endif }; __be16 local_udp_port; __be16 peer_udp_port; int bind_ifindex; unsigned int use_udp_checksums:1, use_udp6_tx_checksums:1, use_udp6_rx_checksums:1, ipv6_v6only:1; }; int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp); #if IS_ENABLED(CONFIG_IPV6) int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp); #else static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) { return 0; } #endif static inline int udp_sock_create(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) { if (cfg->family == AF_INET) return udp_sock_create4(net, cfg, sockp); if (cfg->family == AF_INET6) return udp_sock_create6(net, cfg, sockp); return -EPFNOSUPPORT; } typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk, struct sk_buff *skb); typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk, struct list_head *head, struct sk_buff *skb); typedef int (*udp_tunnel_gro_complete_t)(struct sock *sk, struct sk_buff *skb, int nhoff); struct udp_tunnel_sock_cfg { void *sk_user_data; /* user data used by encap_rcv call back */ /* Used for setting up udp_sock fields, see udp.h for details */ __u8 encap_type; udp_tunnel_encap_rcv_t encap_rcv; udp_tunnel_encap_err_lookup_t encap_err_lookup; udp_tunnel_encap_destroy_t encap_destroy; udp_tunnel_gro_receive_t gro_receive; udp_tunnel_gro_complete_t gro_complete; }; /* Setup the given (UDP) sock to receive UDP encapsulated packets */ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, struct udp_tunnel_sock_cfg *sock_cfg); /* -- List of parsable UDP tunnel types -- * * Adding to this list will result in serious debate. The main issue is * that this list is essentially a list of workarounds for either poorly * designed tunnels, or poorly designed device offloads. * * The parsing supported via these types should really be used for Rx * traffic only as the network stack will have already inserted offsets for * the location of the headers in the skb. In addition any ports that are * pushed should be kept within the namespace without leaking to other * devices such as VFs or other ports on the same device. * * It is strongly encouraged to use CHECKSUM_COMPLETE for Rx to avoid the * need to use this for Rx checksum offload. It should not be necessary to * call this function to perform Tx offloads on outgoing traffic. */ enum udp_parsable_tunnel_type { UDP_TUNNEL_TYPE_VXLAN = BIT(0), /* RFC 7348 */ UDP_TUNNEL_TYPE_GENEVE = BIT(1), /* draft-ietf-nvo3-geneve */ UDP_TUNNEL_TYPE_VXLAN_GPE = BIT(2), /* draft-ietf-nvo3-vxlan-gpe */ }; struct udp_tunnel_info { unsigned short type; sa_family_t sa_family; __be16 port; u8 hw_priv; }; /* Notify network devices of offloadable types */ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, unsigned short type); void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock, unsigned short type); void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type); void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type); static inline void udp_tunnel_get_rx_info(struct net_device *dev) { ASSERT_RTNL(); call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev); } static inline void udp_tunnel_drop_rx_info(struct net_device *dev) { ASSERT_RTNL(); call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev); } /* Transmit the skb using UDP encapsulation. */ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, bool xnet, bool nocheck); int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr, __u8 prio, __u8 ttl, __be32 label, __be16 src_port, __be16 dst_port, bool nocheck); void udp_tunnel_sock_release(struct socket *sock); struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, __be16 flags, __be64 tunnel_id, int md_size); #ifdef CONFIG_INET static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) { int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; return iptunnel_handle_offloads(skb, type); } #endif static inline void udp_tunnel_encap_enable(struct socket *sock) { struct udp_sock *up = udp_sk(sock->sk); if (up->encap_enabled) return; up->encap_enabled = 1; #if IS_ENABLED(CONFIG_IPV6) if (sock->sk->sk_family == PF_INET6) ipv6_stub->udpv6_encap_enable(); else #endif udp_encap_enable(); } #define UDP_TUNNEL_NIC_MAX_TABLES 4 enum udp_tunnel_nic_info_flags { /* Device callbacks may sleep */ UDP_TUNNEL_NIC_INFO_MAY_SLEEP = BIT(0), /* Device only supports offloads when it's open, all ports * will be removed before close and re-added after open. */ UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(1), /* Device supports only IPv4 tunnels */ UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(2), /* Device has hard-coded the IANA VXLAN port (4789) as VXLAN. * This port must not be counted towards n_entries of any table. * Driver will not receive any callback associated with port 4789. */ UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(3), }; struct udp_tunnel_nic; #define UDP_TUNNEL_NIC_MAX_SHARING_DEVICES (U16_MAX / 2) struct udp_tunnel_nic_shared { struct udp_tunnel_nic *udp_tunnel_nic_info; struct list_head devices; }; struct udp_tunnel_nic_shared_node { struct net_device *dev; struct list_head list; }; /** * struct udp_tunnel_nic_info - driver UDP tunnel offload information * @set_port: callback for adding a new port * @unset_port: callback for removing a port * @sync_table: callback for syncing the entire port table at once * @shared: reference to device global state (optional) * @flags: device flags from enum udp_tunnel_nic_info_flags * @tables: UDP port tables this device has * @tables.n_entries: number of entries in this table * @tables.tunnel_types: types of tunnels this table accepts * * Drivers are expected to provide either @set_port and @unset_port callbacks * or the @sync_table callback. Callbacks are invoked with rtnl lock held. * * Devices which (misguidedly) share the UDP tunnel port table across multiple * netdevs should allocate an instance of struct udp_tunnel_nic_shared and * point @shared at it. * There must never be more than %UDP_TUNNEL_NIC_MAX_SHARING_DEVICES devices * sharing a table. * * Known limitations: * - UDP tunnel port notifications are fundamentally best-effort - * it is likely the driver will both see skbs which use a UDP tunnel port, * while not being a tunneled skb, and tunnel skbs from other ports - * drivers should only use these ports for non-critical RX-side offloads, * e.g. the checksum offload; * - none of the devices care about the socket family at present, so we don't * track it. Please extend this code if you care. */ struct udp_tunnel_nic_info { /* one-by-one */ int (*set_port)(struct net_device *dev, unsigned int table, unsigned int entry, struct udp_tunnel_info *ti); int (*unset_port)(struct net_device *dev, unsigned int table, unsigned int entry, struct udp_tunnel_info *ti); /* all at once */ int (*sync_table)(struct net_device *dev, unsigned int table); struct udp_tunnel_nic_shared *shared; unsigned int flags; struct udp_tunnel_nic_table_info { unsigned int n_entries; unsigned int tunnel_types; } tables[UDP_TUNNEL_NIC_MAX_TABLES]; }; /* UDP tunnel module dependencies * * Tunnel drivers are expected to have a hard dependency on the udp_tunnel * module. NIC drivers are not, they just attach their * struct udp_tunnel_nic_info to the netdev and wait for callbacks to come. * Loading a tunnel driver will cause the udp_tunnel module to be loaded * and only then will all the required state structures be allocated. * Since we want a weak dependency from the drivers and the core to udp_tunnel * we call things through the following stubs. */ struct udp_tunnel_nic_ops { void (*get_port)(struct net_device *dev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti); void (*set_port_priv)(struct net_device *dev, unsigned int table, unsigned int idx, u8 priv); void (*add_port)(struct net_device *dev, struct udp_tunnel_info *ti); void (*del_port)(struct net_device *dev, struct udp_tunnel_info *ti); void (*reset_ntf)(struct net_device *dev); size_t (*dump_size)(struct net_device *dev, unsigned int table); int (*dump_write)(struct net_device *dev, unsigned int table, struct sk_buff *skb); }; #ifdef CONFIG_INET extern const struct udp_tunnel_nic_ops *udp_tunnel_nic_ops; #else #define udp_tunnel_nic_ops ((struct udp_tunnel_nic_ops *)NULL) #endif static inline void udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti) { /* This helper is used from .sync_table, we indicate empty entries * by zero'ed @ti. Drivers which need to know the details of a port * when it gets deleted should use the .set_port / .unset_port * callbacks. * Zero out here, otherwise !CONFIG_INET causes uninitilized warnings. */ memset(ti, 0, sizeof(*ti)); if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->get_port(dev, table, idx, ti); } static inline void udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table, unsigned int idx, u8 priv) { if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->set_port_priv(dev, table, idx, priv); } static inline void udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti) { if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->add_port(dev, ti); } static inline void udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti) { if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->del_port(dev, ti); } /** * udp_tunnel_nic_reset_ntf() - device-originating reset notification * @dev: network interface device structure * * Called by the driver to inform the core that the entire UDP tunnel port * state has been lost, usually due to device reset. Core will assume device * forgot all the ports and issue .set_port and .sync_table callbacks as * necessary. * * This function must be called with rtnl lock held, and will issue all * the callbacks before returning. */ static inline void udp_tunnel_nic_reset_ntf(struct net_device *dev) { if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->reset_ntf(dev); } static inline size_t udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table) { if (!udp_tunnel_nic_ops) return 0; return udp_tunnel_nic_ops->dump_size(dev, table); } static inline int udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table, struct sk_buff *skb) { if (!udp_tunnel_nic_ops) return 0; return udp_tunnel_nic_ops->dump_write(dev, table, skb); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 /* SPDX-License-Identifier: GPL-2.0+ */ #undef TRACE_SYSTEM #define TRACE_SYSTEM rseq #if !defined(_TRACE_RSEQ_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_RSEQ_H #include <linux/tracepoint.h> #include <linux/types.h> TRACE_EVENT(rseq_update, TP_PROTO(struct task_struct *t), TP_ARGS(t), TP_STRUCT__entry( __field(s32, cpu_id) ), TP_fast_assign( __entry->cpu_id = raw_smp_processor_id(); ), TP_printk("cpu_id=%d", __entry->cpu_id) ); TRACE_EVENT(rseq_ip_fixup, TP_PROTO(unsigned long regs_ip, unsigned long start_ip, unsigned long post_commit_offset, unsigned long abort_ip), TP_ARGS(regs_ip, start_ip, post_commit_offset, abort_ip), TP_STRUCT__entry( __field(unsigned long, regs_ip) __field(unsigned long, start_ip) __field(unsigned long, post_commit_offset) __field(unsigned long, abort_ip) ), TP_fast_assign( __entry->regs_ip = regs_ip; __entry->start_ip = start_ip; __entry->post_commit_offset = post_commit_offset; __entry->abort_ip = abort_ip; ), TP_printk("regs_ip=0x%lx start_ip=0x%lx post_commit_offset=%lu abort_ip=0x%lx", __entry->regs_ip, __entry->start_ip, __entry->post_commit_offset, __entry->abort_ip) ); #endif /* _TRACE_SOCK_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Cryptographic scatter and gather helpers. * * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> * Copyright (c) 2002 Adam J. Richter <adam@yggdrasil.com> * Copyright (c) 2004 Jean-Luc Cooke <jlcooke@certainkey.com> * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_SCATTERWALK_H #define _CRYPTO_SCATTERWALK_H #include <crypto/algapi.h> #include <linux/highmem.h> #include <linux/kernel.h> #include <linux/scatterlist.h> static inline void scatterwalk_crypto_chain(struct scatterlist *head, struct scatterlist *sg, int num) { if (sg) sg_chain(head, num, sg); else sg_mark_end(head); } static inline unsigned int scatterwalk_pagelen(struct scatter_walk *walk) { unsigned int len = walk->sg->offset + walk->sg->length - walk->offset; unsigned int len_this_page = offset_in_page(~walk->offset) + 1; return len_this_page > len ? len : len_this_page; } static inline unsigned int scatterwalk_clamp(struct scatter_walk *walk, unsigned int nbytes) { unsigned int len_this_page = scatterwalk_pagelen(walk); return nbytes > len_this_page ? len_this_page : nbytes; } static inline void scatterwalk_advance(struct scatter_walk *walk, unsigned int nbytes) { walk->offset += nbytes; } static inline unsigned int scatterwalk_aligned(struct scatter_walk *walk, unsigned int alignmask) { return !(walk->offset & alignmask); } static inline struct page *scatterwalk_page(struct scatter_walk *walk) { return sg_page(walk->sg) + (walk->offset >> PAGE_SHIFT); } static inline void scatterwalk_unmap(void *vaddr) { kunmap_atomic(vaddr); } static inline void scatterwalk_start(struct scatter_walk *walk, struct scatterlist *sg) { walk->sg = sg; walk->offset = sg->offset; } static inline void *scatterwalk_map(struct scatter_walk *walk) { return kmap_atomic(scatterwalk_page(walk)) + offset_in_page(walk->offset); } static inline void scatterwalk_pagedone(struct scatter_walk *walk, int out, unsigned int more) { if (out) { struct page *page; page = sg_page(walk->sg) + ((walk->offset - 1) >> PAGE_SHIFT); /* Test ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE first as * PageSlab cannot be optimised away per se due to * use of volatile pointer. */ if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE && !PageSlab(page)) flush_dcache_page(page); } if (more && walk->offset >= walk->sg->offset + walk->sg->length) scatterwalk_start(walk, sg_next(walk->sg)); } static inline void scatterwalk_done(struct scatter_walk *walk, int out, int more) { if (!more || walk->offset >= walk->sg->offset + walk->sg->length || !(walk->offset & (PAGE_SIZE - 1))) scatterwalk_pagedone(walk, out, more); } void scatterwalk_copychunks(void *buf, struct scatter_walk *walk, size_t nbytes, int out); void *scatterwalk_map(struct scatter_walk *walk); void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg, unsigned int start, unsigned int nbytes, int out); struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2], struct scatterlist *src, unsigned int len); #endif /* _CRYPTO_SCATTERWALK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 #undef TRACE_SYSTEM #define TRACE_SYSTEM neigh #if !defined(_TRACE_NEIGH_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_NEIGH_H #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/tracepoint.h> #include <net/neighbour.h> #define neigh_state_str(state) \ __print_symbolic(state, \ { NUD_INCOMPLETE, "incomplete" }, \ { NUD_REACHABLE, "reachable" }, \ { NUD_STALE, "stale" }, \ { NUD_DELAY, "delay" }, \ { NUD_PROBE, "probe" }, \ { NUD_FAILED, "failed" }, \ { NUD_NOARP, "noarp" }, \ { NUD_PERMANENT, "permanent"}) TRACE_EVENT(neigh_create, TP_PROTO(struct neigh_table *tbl, struct net_device *dev, const void *pkey, const struct neighbour *n, bool exempt_from_gc), TP_ARGS(tbl, dev, pkey, n, exempt_from_gc), TP_STRUCT__entry( __field(u32, family) __dynamic_array(char, dev, IFNAMSIZ ) __field(int, entries) __field(u8, created) __field(u8, gc_exempt) __array(u8, primary_key4, 4) __array(u8, primary_key6, 16) ), TP_fast_assign( struct in6_addr *pin6; __be32 *p32; __entry->family = tbl->family; __assign_str(dev, (dev ? dev->name : "NULL")); __entry->entries = atomic_read(&tbl->gc_entries); __entry->created = n != NULL; __entry->gc_exempt = exempt_from_gc; pin6 = (struct in6_addr *)__entry->primary_key6; p32 = (__be32 *)__entry->primary_key4; if (tbl->family == AF_INET) *p32 = *(__be32 *)pkey; else *p32 = 0; #if IS_ENABLED(CONFIG_IPV6) if (tbl->family == AF_INET6) { pin6 = (struct in6_addr *)__entry->primary_key6; *pin6 = *(struct in6_addr *)pkey; } #endif ), TP_printk("family %d dev %s entries %d primary_key4 %pI4 primary_key6 %pI6c created %d gc_exempt %d", __entry->family, __get_str(dev), __entry->entries, __entry->primary_key4, __entry->primary_key6, __entry->created, __entry->gc_exempt) ); TRACE_EVENT(neigh_update, TP_PROTO(struct neighbour *n, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid), TP_ARGS(n, lladdr, new, flags, nlmsg_pid), TP_STRUCT__entry( __field(u32, family) __string(dev, (n->dev ? n->dev->name : "NULL")) __array(u8, lladdr, MAX_ADDR_LEN) __field(u8, lladdr_len) __field(u8, flags) __field(u8, nud_state) __field(u8, type) __field(u8, dead) __field(int, refcnt) __array(__u8, primary_key4, 4) __array(__u8, primary_key6, 16) __field(unsigned long, confirmed) __field(unsigned long, updated) __field(unsigned long, used) __array(u8, new_lladdr, MAX_ADDR_LEN) __field(u8, new_state) __field(u32, update_flags) __field(u32, pid) ), TP_fast_assign( int lladdr_len = (n->dev ? n->dev->addr_len : MAX_ADDR_LEN); struct in6_addr *pin6; __be32 *p32; __entry->family = n->tbl->family; __assign_str(dev, (n->dev ? n->dev->name : "NULL")); __entry->lladdr_len = lladdr_len; memcpy(__entry->lladdr, n->ha, lladdr_len); __entry->flags = n->flags; __entry->nud_state = n->nud_state; __entry->type = n->type; __entry->dead = n->dead; __entry->refcnt = refcount_read(&n->refcnt); pin6 = (struct in6_addr *)__entry->primary_key6; p32 = (__be32 *)__entry->primary_key4; if (n->tbl->family == AF_INET) *p32 = *(__be32 *)n->primary_key; else *p32 = 0; #if IS_ENABLED(CONFIG_IPV6) if (n->tbl->family == AF_INET6) { pin6 = (struct in6_addr *)__entry->primary_key6; *pin6 = *(struct in6_addr *)n->primary_key; } else #endif { ipv6_addr_set_v4mapped(*p32, pin6); } __entry->confirmed = n->confirmed; __entry->updated = n->updated; __entry->used = n->used; if (lladdr) memcpy(__entry->new_lladdr, lladdr, lladdr_len); __entry->new_state = new; __entry->update_flags = flags; __entry->pid = nlmsg_pid; ), TP_printk("family %d dev %s lladdr %s flags %02x nud_state %s type %02x " "dead %d refcnt %d primary_key4 %pI4 primary_key6 %pI6c " "confirmed %lu updated %lu used %lu new_lladdr %s " "new_state %s update_flags %02x pid %d", __entry->family, __get_str(dev), __print_hex_str(__entry->lladdr, __entry->lladdr_len), __entry->flags, neigh_state_str(__entry->nud_state), __entry->type, __entry->dead, __entry->refcnt, __entry->primary_key4, __entry->primary_key6, __entry->confirmed, __entry->updated, __entry->used, __print_hex_str(__entry->new_lladdr, __entry->lladdr_len), neigh_state_str(__entry->new_state), __entry->update_flags, __entry->pid) ); DECLARE_EVENT_CLASS(neigh__update, TP_PROTO(struct neighbour *n, int err), TP_ARGS(n, err), TP_STRUCT__entry( __field(u32, family) __string(dev, (n->dev ? n->dev->name : "NULL")) __array(u8, lladdr, MAX_ADDR_LEN) __field(u8, lladdr_len) __field(u8, flags) __field(u8, nud_state) __field(u8, type) __field(u8, dead) __field(int, refcnt) __array(__u8, primary_key4, 4) __array(__u8, primary_key6, 16) __field(unsigned long, confirmed) __field(unsigned long, updated) __field(unsigned long, used) __field(u32, err) ), TP_fast_assign( int lladdr_len = (n->dev ? n->dev->addr_len : MAX_ADDR_LEN); struct in6_addr *pin6; __be32 *p32; __entry->family = n->tbl->family; __assign_str(dev, (n->dev ? n->dev->name : "NULL")); __entry->lladdr_len = lladdr_len; memcpy(__entry->lladdr, n->ha, lladdr_len); __entry->flags = n->flags; __entry->nud_state = n->nud_state; __entry->type = n->type; __entry->dead = n->dead; __entry->refcnt = refcount_read(&n->refcnt); pin6 = (struct in6_addr *)__entry->primary_key6; p32 = (__be32 *)__entry->primary_key4; if (n->tbl->family == AF_INET) *p32 = *(__be32 *)n->primary_key; else *p32 = 0; #if IS_ENABLED(CONFIG_IPV6) if (n->tbl->family == AF_INET6) { pin6 = (struct in6_addr *)__entry->primary_key6; *pin6 = *(struct in6_addr *)n->primary_key; } else #endif { ipv6_addr_set_v4mapped(*p32, pin6); } __entry->confirmed = n->confirmed; __entry->updated = n->updated; __entry->used = n->used; __entry->err = err; ), TP_printk("family %d dev %s lladdr %s flags %02x nud_state %s type %02x " "dead %d refcnt %d primary_key4 %pI4 primary_key6 %pI6c " "confirmed %lu updated %lu used %lu err %d", __entry->family, __get_str(dev), __print_hex_str(__entry->lladdr, __entry->lladdr_len), __entry->flags, neigh_state_str(__entry->nud_state), __entry->type, __entry->dead, __entry->refcnt, __entry->primary_key4, __entry->primary_key6, __entry->confirmed, __entry->updated, __entry->used, __entry->err) ); DEFINE_EVENT(neigh__update, neigh_update_done, TP_PROTO(struct neighbour *neigh, int err), TP_ARGS(neigh, err) ); DEFINE_EVENT(neigh__update, neigh_timer_handler, TP_PROTO(struct neighbour *neigh, int err), TP_ARGS(neigh, err) ); DEFINE_EVENT(neigh__update, neigh_event_send_done, TP_PROTO(struct neighbour *neigh, int err), TP_ARGS(neigh, err) ); DEFINE_EVENT(neigh__update, neigh_event_send_dead, TP_PROTO(struct neighbour *neigh, int err), TP_ARGS(neigh, err) ); DEFINE_EVENT(neigh__update, neigh_cleanup_and_release, TP_PROTO(struct neighbour *neigh, int rc), TP_ARGS(neigh, rc) ); #endif /* _TRACE_NEIGH_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 #ifndef _LINUX_GENERIC_RADIX_TREE_H #define _LINUX_GENERIC_RADIX_TREE_H /** * DOC: Generic radix trees/sparse arrays * * Very simple and minimalistic, supporting arbitrary size entries up to * PAGE_SIZE. * * A genradix is defined with the type it will store, like so: * * static GENRADIX(struct foo) foo_genradix; * * The main operations are: * * - genradix_init(radix) - initialize an empty genradix * * - genradix_free(radix) - free all memory owned by the genradix and * reinitialize it * * - genradix_ptr(radix, idx) - gets a pointer to the entry at idx, returning * NULL if that entry does not exist * * - genradix_ptr_alloc(radix, idx, gfp) - gets a pointer to an entry, * allocating it if necessary * * - genradix_for_each(radix, iter, p) - iterate over each entry in a genradix * * The radix tree allocates one page of entries at a time, so entries may exist * that were never explicitly allocated - they will be initialized to all * zeroes. * * Internally, a genradix is just a radix tree of pages, and indexing works in * terms of byte offsets. The wrappers in this header file use sizeof on the * type the radix contains to calculate a byte offset from the index - see * __idx_to_offset. */ #include <asm/page.h> #include <linux/bug.h> #include <linux/kernel.h> #include <linux/log2.h> struct genradix_root; struct __genradix { struct genradix_root *root; }; /* * NOTE: currently, sizeof(_type) must not be larger than PAGE_SIZE: */ #define __GENRADIX_INITIALIZER \ { \ .tree = { \ .root = NULL, \ } \ } /* * We use a 0 size array to stash the type we're storing without taking any * space at runtime - then the various accessor macros can use typeof() to get * to it for casts/sizeof - we also force the alignment so that storing a type * with a ridiculous alignment doesn't blow up the alignment or size of the * genradix. */ #define GENRADIX(_type) \ struct { \ struct __genradix tree; \ _type type[0] __aligned(1); \ } #define DEFINE_GENRADIX(_name, _type) \ GENRADIX(_type) _name = __GENRADIX_INITIALIZER /** * genradix_init - initialize a genradix * @_radix: genradix to initialize * * Does not fail */ #define genradix_init(_radix) \ do { \ *(_radix) = (typeof(*_radix)) __GENRADIX_INITIALIZER; \ } while (0) void __genradix_free(struct __genradix *); /** * genradix_free: free all memory owned by a genradix * @_radix: the genradix to free * * After freeing, @_radix will be reinitialized and empty */ #define genradix_free(_radix) __genradix_free(&(_radix)->tree) static inline size_t __idx_to_offset(size_t idx, size_t obj_size) { if (__builtin_constant_p(obj_size)) BUILD_BUG_ON(obj_size > PAGE_SIZE); else BUG_ON(obj_size > PAGE_SIZE); if (!is_power_of_2(obj_size)) { size_t objs_per_page = PAGE_SIZE / obj_size; return (idx / objs_per_page) * PAGE_SIZE + (idx % objs_per_page) * obj_size; } else { return idx * obj_size; } } #define __genradix_cast(_radix) (typeof((_radix)->type[0]) *) #define __genradix_obj_size(_radix) sizeof((_radix)->type[0]) #define __genradix_idx_to_offset(_radix, _idx) \ __idx_to_offset(_idx, __genradix_obj_size(_radix)) void *__genradix_ptr(struct __genradix *, size_t); /** * genradix_ptr - get a pointer to a genradix entry * @_radix: genradix to access * @_idx: index to fetch * * Returns a pointer to entry at @_idx, or NULL if that entry does not exist. */ #define genradix_ptr(_radix, _idx) \ (__genradix_cast(_radix) \ __genradix_ptr(&(_radix)->tree, \ __genradix_idx_to_offset(_radix, _idx))) void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t); /** * genradix_ptr_alloc - get a pointer to a genradix entry, allocating it * if necessary * @_radix: genradix to access * @_idx: index to fetch * @_gfp: gfp mask * * Returns a pointer to entry at @_idx, or NULL on allocation failure */ #define genradix_ptr_alloc(_radix, _idx, _gfp) \ (__genradix_cast(_radix) \ __genradix_ptr_alloc(&(_radix)->tree, \ __genradix_idx_to_offset(_radix, _idx), \ _gfp)) struct genradix_iter { size_t offset; size_t pos; }; /** * genradix_iter_init - initialize a genradix_iter * @_radix: genradix that will be iterated over * @_idx: index to start iterating from */ #define genradix_iter_init(_radix, _idx) \ ((struct genradix_iter) { \ .pos = (_idx), \ .offset = __genradix_idx_to_offset((_radix), (_idx)),\ }) void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t); /** * genradix_iter_peek - get first entry at or above iterator's current * position * @_iter: a genradix_iter * @_radix: genradix being iterated over * * If no more entries exist at or above @_iter's current position, returns NULL */ #define genradix_iter_peek(_iter, _radix) \ (__genradix_cast(_radix) \ __genradix_iter_peek(_iter, &(_radix)->tree, \ PAGE_SIZE / __genradix_obj_size(_radix))) static inline void __genradix_iter_advance(struct genradix_iter *iter, size_t obj_size) { iter->offset += obj_size; if (!is_power_of_2(obj_size) && (iter->offset & (PAGE_SIZE - 1)) + obj_size > PAGE_SIZE) iter->offset = round_up(iter->offset, PAGE_SIZE); iter->pos++; } #define genradix_iter_advance(_iter, _radix) \ __genradix_iter_advance(_iter, __genradix_obj_size(_radix)) #define genradix_for_each_from(_radix, _iter, _p, _start) \ for (_iter = genradix_iter_init(_radix, _start); \ (_p = genradix_iter_peek(&_iter, _radix)) != NULL; \ genradix_iter_advance(&_iter, _radix)) /** * genradix_for_each - iterate over entry in a genradix * @_radix: genradix to iterate over * @_iter: a genradix_iter to track current position * @_p: pointer to genradix entry type * * On every iteration, @_p will point to the current entry, and @_iter.pos * will be the current entry's index. */ #define genradix_for_each(_radix, _iter, _p) \ genradix_for_each_from(_radix, _iter, _p, 0) int __genradix_prealloc(struct __genradix *, size_t, gfp_t); /** * genradix_prealloc - preallocate entries in a generic radix tree * @_radix: genradix to preallocate * @_nr: number of entries to preallocate * @_gfp: gfp mask * * Returns 0 on success, -ENOMEM on failure */ #define genradix_prealloc(_radix, _nr, _gfp) \ __genradix_prealloc(&(_radix)->tree, \ __genradix_idx_to_offset(_radix, _nr + 1),\ _gfp) #endif /* _LINUX_GENERIC_RADIX_TREE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM alarmtimer #if !defined(_TRACE_ALARMTIMER_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_ALARMTIMER_H #include <linux/alarmtimer.h> #include <linux/rtc.h> #include <linux/tracepoint.h> TRACE_DEFINE_ENUM(ALARM_REALTIME); TRACE_DEFINE_ENUM(ALARM_BOOTTIME); TRACE_DEFINE_ENUM(ALARM_REALTIME_FREEZER); TRACE_DEFINE_ENUM(ALARM_BOOTTIME_FREEZER); #define show_alarm_type(type) __print_flags(type, " | ", \ { 1 << ALARM_REALTIME, "REALTIME" }, \ { 1 << ALARM_BOOTTIME, "BOOTTIME" }, \ { 1 << ALARM_REALTIME_FREEZER, "REALTIME Freezer" }, \ { 1 << ALARM_BOOTTIME_FREEZER, "BOOTTIME Freezer" }) TRACE_EVENT(alarmtimer_suspend, TP_PROTO(ktime_t expires, int flag), TP_ARGS(expires, flag), TP_STRUCT__entry( __field(s64, expires) __field(unsigned char, alarm_type) ), TP_fast_assign( __entry->expires = expires; __entry->alarm_type = flag; ), TP_printk("alarmtimer type:%s expires:%llu", show_alarm_type((1 << __entry->alarm_type)), __entry->expires ) ); DECLARE_EVENT_CLASS(alarm_class, TP_PROTO(struct alarm *alarm, ktime_t now), TP_ARGS(alarm, now), TP_STRUCT__entry( __field(void *, alarm) __field(unsigned char, alarm_type) __field(s64, expires) __field(s64, now) ), TP_fast_assign( __entry->alarm = alarm; __entry->alarm_type = alarm->type; __entry->expires = alarm->node.expires; __entry->now = now; ), TP_printk("alarmtimer:%p type:%s expires:%llu now:%llu", __entry->alarm, show_alarm_type((1 << __entry->alarm_type)), __entry->expires, __entry->now ) ); DEFINE_EVENT(alarm_class, alarmtimer_fired, TP_PROTO(struct alarm *alarm, ktime_t now), TP_ARGS(alarm, now) ); DEFINE_EVENT(alarm_class, alarmtimer_start, TP_PROTO(struct alarm *alarm, ktime_t now), TP_ARGS(alarm, now) ); DEFINE_EVENT(alarm_class, alarmtimer_cancel, TP_PROTO(struct alarm *alarm, ktime_t now), TP_ARGS(alarm, now) ); #endif /* _TRACE_ALARMTIMER_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_KERNEL_H #define _LINUX_KERNEL_H #include <stdarg.h> #include <linux/limits.h> #include <linux/linkage.h> #include <linux/stddef.h> #include <linux/types.h> #include <linux/compiler.h> #include <linux/bitops.h> #include <linux/log2.h> #include <linux/minmax.h> #include <linux/typecheck.h> #include <linux/printk.h> #include <linux/build_bug.h> #include <asm/byteorder.h> #include <asm/div64.h> #include <uapi/linux/kernel.h> #define STACK_MAGIC 0xdeadbeef /** * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value * @x: value to repeat * * NOTE: @x is not checked for > 0xff; larger values produce odd results. */ #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) /* @a is a power of 2 value */ #define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) #define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) #define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) #define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) #define PTR_ALIGN_DOWN(p, a) ((typeof(p))ALIGN_DOWN((unsigned long)(p), (a))) #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) /* generic data direction definitions */ #define READ 0 #define WRITE 1 /** * ARRAY_SIZE - get the number of elements in array @arr * @arr: array to be sized */ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) #define u64_to_user_ptr(x) ( \ { \ typecheck(u64, (x)); \ (void __user *)(uintptr_t)(x); \ } \ ) /* * This looks more complex than it should be. But we need to * get the type for the ~ right in round_down (it needs to be * as wide as the result!), and we want to evaluate the macro * arguments just once each. */ #define __round_mask(x, y) ((__typeof__(x))((y)-1)) /** * round_up - round up to next specified power of 2 * @x: the value to round * @y: multiple to round up to (must be a power of 2) * * Rounds @x up to next multiple of @y (which must be a power of 2). * To perform arbitrary rounding up, use roundup() below. */ #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) /** * round_down - round down to next specified power of 2 * @x: the value to round * @y: multiple to round down to (must be a power of 2) * * Rounds @x down to next multiple of @y (which must be a power of 2). * To perform arbitrary rounding down, use rounddown() below. */ #define round_down(x, y) ((x) & ~__round_mask(x, y)) #define typeof_member(T, m) typeof(((T*)0)->m) #define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP #define DIV_ROUND_DOWN_ULL(ll, d) \ ({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; }) #define DIV_ROUND_UP_ULL(ll, d) \ DIV_ROUND_DOWN_ULL((unsigned long long)(ll) + (d) - 1, (d)) #if BITS_PER_LONG == 32 # define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d) #else # define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP(ll,d) #endif /** * roundup - round up to the next specified multiple * @x: the value to up * @y: multiple to round up to * * Rounds @x up to next multiple of @y. If @y will always be a power * of 2, consider using the faster round_up(). */ #define roundup(x, y) ( \ { \ typeof(y) __y = y; \ (((x) + (__y - 1)) / __y) * __y; \ } \ ) /** * rounddown - round down to next specified multiple * @x: the value to round * @y: multiple to round down to * * Rounds @x down to next multiple of @y. If @y will always be a power * of 2, consider using the faster round_down(). */ #define rounddown(x, y) ( \ { \ typeof(x) __x = (x); \ __x - (__x % (y)); \ } \ ) /* * Divide positive or negative dividend by positive or negative divisor * and round to closest integer. Result is undefined for negative * divisors if the dividend variable type is unsigned and for negative * dividends if the divisor variable type is unsigned. */ #define DIV_ROUND_CLOSEST(x, divisor)( \ { \ typeof(x) __x = x; \ typeof(divisor) __d = divisor; \ (((typeof(x))-1) > 0 || \ ((typeof(divisor))-1) > 0 || \ (((__x) > 0) == ((__d) > 0))) ? \ (((__x) + ((__d) / 2)) / (__d)) : \ (((__x) - ((__d) / 2)) / (__d)); \ } \ ) /* * Same as above but for u64 dividends. divisor must be a 32-bit * number. */ #define DIV_ROUND_CLOSEST_ULL(x, divisor)( \ { \ typeof(divisor) __d = divisor; \ unsigned long long _tmp = (x) + (__d) / 2; \ do_div(_tmp, __d); \ _tmp; \ } \ ) /* * Multiplies an integer by a fraction, while avoiding unnecessary * overflow or loss of precision. */ #define mult_frac(x, numer, denom)( \ { \ typeof(x) quot = (x) / (denom); \ typeof(x) rem = (x) % (denom); \ (quot * (numer)) + ((rem * (numer)) / (denom)); \ } \ ) #define _RET_IP_ (unsigned long)__builtin_return_address(0) #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) #define sector_div(a, b) do_div(a, b) /** * upper_32_bits - return bits 32-63 of a number * @n: the number we're accessing * * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress * the "right shift count >= width of type" warning when that quantity is * 32-bits. */ #define upper_32_bits(n) ((u32)(((n) >> 16) >> 16)) /** * lower_32_bits - return bits 0-31 of a number * @n: the number we're accessing */ #define lower_32_bits(n) ((u32)((n) & 0xffffffff)) struct completion; struct pt_regs; struct user; #ifdef CONFIG_PREEMPT_VOLUNTARY extern int _cond_resched(void); # define might_resched() _cond_resched() #else # define might_resched() do { } while (0) #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP extern void ___might_sleep(const char *file, int line, int preempt_offset); extern void __might_sleep(const char *file, int line, int preempt_offset); extern void __cant_sleep(const char *file, int line, int preempt_offset); /** * might_sleep - annotation for functions that can sleep * * this macro will print a stack trace if it is executed in an atomic * context (spinlock, irq-handler, ...). Additional sections where blocking is * not allowed can be annotated with non_block_start() and non_block_end() * pairs. * * This is a useful debugging help to be able to catch problems early and not * be bitten later when the calling function happens to sleep when it is not * supposed to. */ # define might_sleep() \ do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) /** * cant_sleep - annotation for functions that cannot sleep * * this macro will print a stack trace if it is executed with preemption enabled */ # define cant_sleep() \ do { __cant_sleep(__FILE__, __LINE__, 0); } while (0) # define sched_annotate_sleep() (current->task_state_change = 0) /** * non_block_start - annotate the start of section where sleeping is prohibited * * This is on behalf of the oom reaper, specifically when it is calling the mmu * notifiers. The problem is that if the notifier were to block on, for example, * mutex_lock() and if the process which holds that mutex were to perform a * sleeping memory allocation, the oom reaper is now blocked on completion of * that memory allocation. Other blocking calls like wait_event() pose similar * issues. */ # define non_block_start() (current->non_block_count++) /** * non_block_end - annotate the end of section where sleeping is prohibited * * Closes a section opened by non_block_start(). */ # define non_block_end() WARN_ON(current->non_block_count-- == 0) #else static inline void ___might_sleep(const char *file, int line, int preempt_offset) { } static inline void __might_sleep(const char *file, int line, int preempt_offset) { } # define might_sleep() do { might_resched(); } while (0) # define cant_sleep() do { } while (0) # define sched_annotate_sleep() do { } while (0) # define non_block_start() do { } while (0) # define non_block_end() do { } while (0) #endif #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) #ifndef CONFIG_PREEMPT_RT # define cant_migrate() cant_sleep() #else /* Placeholder for now */ # define cant_migrate() do { } while (0) #endif /** * abs - return absolute value of an argument * @x: the value. If it is unsigned type, it is converted to signed type first. * char is treated as if it was signed (regardless of whether it really is) * but the macro's return type is preserved as char. * * Return: an absolute value of x. */ #define abs(x) __abs_choose_expr(x, long long, \ __abs_choose_expr(x, long, \ __abs_choose_expr(x, int, \ __abs_choose_expr(x, short, \ __abs_choose_expr(x, char, \ __builtin_choose_expr( \ __builtin_types_compatible_p(typeof(x), char), \ (char)({ signed char __x = (x); __x<0?-__x:__x; }), \ ((void)0))))))) #define __abs_choose_expr(x, type, other) __builtin_choose_expr( \ __builtin_types_compatible_p(typeof(x), signed type) || \ __builtin_types_compatible_p(typeof(x), unsigned type), \ ({ signed type __x = (x); __x < 0 ? -__x : __x; }), other) /** * reciprocal_scale - "scale" a value into range [0, ep_ro) * @val: value * @ep_ro: right open interval endpoint * * Perform a "reciprocal multiplication" in order to "scale" a value into * range [0, @ep_ro), where the upper interval endpoint is right-open. * This is useful, e.g. for accessing a index of an array containing * @ep_ro elements, for example. Think of it as sort of modulus, only that * the result isn't that of modulo. ;) Note that if initial input is a * small value, then result will return 0. * * Return: a result based on @val in interval [0, @ep_ro). */ static inline u32 reciprocal_scale(u32 val, u32 ep_ro) { return (u32)(((u64) val * ep_ro) >> 32); } #if defined(CONFIG_MMU) && \ (defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)) #define might_fault() __might_fault(__FILE__, __LINE__) void __might_fault(const char *file, int line); #else static inline void might_fault(void) { } #endif extern struct atomic_notifier_head panic_notifier_list; extern long (*panic_blink)(int state); __printf(1, 2) void panic(const char *fmt, ...) __noreturn __cold; void nmi_panic(struct pt_regs *regs, const char *msg); extern void oops_enter(void); extern void oops_exit(void); extern bool oops_may_print(void); void do_exit(long error_code) __noreturn; void complete_and_exit(struct completion *, long) __noreturn; /* Internal, do not use. */ int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); int __must_check _kstrtol(const char *s, unsigned int base, long *res); int __must_check kstrtoull(const char *s, unsigned int base, unsigned long long *res); int __must_check kstrtoll(const char *s, unsigned int base, long long *res); /** * kstrtoul - convert a string to an unsigned long * @s: The start of the string. The string must be null-terminated, and may also * include a single newline before its terminating null. The first character * may also be a plus sign, but not a minus sign. * @base: The number base to use. The maximum supported base is 16. If base is * given as 0, then the base of the string is automatically detected with the * conventional semantics - If it begins with 0x the number will be parsed as a * hexadecimal (case insensitive), if it otherwise begins with 0, it will be * parsed as an octal number. Otherwise it will be parsed as a decimal. * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. * Preferred over simple_strtoul(). Return code must be checked. */ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res) { /* * We want to shortcut function call, but * __builtin_types_compatible_p(unsigned long, unsigned long long) = 0. */ if (sizeof(unsigned long) == sizeof(unsigned long long) && __alignof__(unsigned long) == __alignof__(unsigned long long)) return kstrtoull(s, base, (unsigned long long *)res); else return _kstrtoul(s, base, res); } /** * kstrtol - convert a string to a long * @s: The start of the string. The string must be null-terminated, and may also * include a single newline before its terminating null. The first character * may also be a plus sign or a minus sign. * @base: The number base to use. The maximum supported base is 16. If base is * given as 0, then the base of the string is automatically detected with the * conventional semantics - If it begins with 0x the number will be parsed as a * hexadecimal (case insensitive), if it otherwise begins with 0, it will be * parsed as an octal number. Otherwise it will be parsed as a decimal. * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. * Preferred over simple_strtol(). Return code must be checked. */ static inline int __must_check kstrtol(const char *s, unsigned int base, long *res) { /* * We want to shortcut function call, but * __builtin_types_compatible_p(long, long long) = 0. */ if (sizeof(long) == sizeof(long long) && __alignof__(long) == __alignof__(long long)) return kstrtoll(s, base, (long long *)res); else return _kstrtol(s, base, res); } int __must_check kstrtouint(const char *s, unsigned int base, unsigned int *res); int __must_check kstrtoint(const char *s, unsigned int base, int *res); static inline int __must_check kstrtou64(const char *s, unsigned int base, u64 *res) { return kstrtoull(s, base, res); } static inline int __must_check kstrtos64(const char *s, unsigned int base, s64 *res) { return kstrtoll(s, base, res); } static inline int __must_check kstrtou32(const char *s, unsigned int base, u32 *res) { return kstrtouint(s, base, res); } static inline int __must_check kstrtos32(const char *s, unsigned int base, s32 *res) { return kstrtoint(s, base, res); } int __must_check kstrtou16(const char *s, unsigned int base, u16 *res); int __must_check kstrtos16(const char *s, unsigned int base, s16 *res); int __must_check kstrtou8(const char *s, unsigned int base, u8 *res); int __must_check kstrtos8(const char *s, unsigned int base, s8 *res); int __must_check kstrtobool(const char *s, bool *res); int __must_check kstrtoull_from_user(const char __user *s, size_t count, unsigned int base, unsigned long long *res); int __must_check kstrtoll_from_user(const char __user *s, size_t count, unsigned int base, long long *res); int __must_check kstrtoul_from_user(const char __user *s, size_t count, unsigned int base, unsigned long *res); int __must_check kstrtol_from_user(const char __user *s, size_t count, unsigned int base, long *res); int __must_check kstrtouint_from_user(const char __user *s, size_t count, unsigned int base, unsigned int *res); int __must_check kstrtoint_from_user(const char __user *s, size_t count, unsigned int base, int *res); int __must_check kstrtou16_from_user(const char __user *s, size_t count, unsigned int base, u16 *res); int __must_check kstrtos16_from_user(const char __user *s, size_t count, unsigned int base, s16 *res); int __must_check kstrtou8_from_user(const char __user *s, size_t count, unsigned int base, u8 *res); int __must_check kstrtos8_from_user(const char __user *s, size_t count, unsigned int base, s8 *res); int __must_check kstrtobool_from_user(const char __user *s, size_t count, bool *res); static inline int __must_check kstrtou64_from_user(const char __user *s, size_t count, unsigned int base, u64 *res) { return kstrtoull_from_user(s, count, base, res); } static inline int __must_check kstrtos64_from_user(const char __user *s, size_t count, unsigned int base, s64 *res) { return kstrtoll_from_user(s, count, base, res); } static inline int __must_check kstrtou32_from_user(const char __user *s, size_t count, unsigned int base, u32 *res) { return kstrtouint_from_user(s, count, base, res); } static inline int __must_check kstrtos32_from_user(const char __user *s, size_t count, unsigned int base, s32 *res) { return kstrtoint_from_user(s, count, base, res); } /* * Use kstrto<foo> instead. * * NOTE: simple_strto<foo> does not check for the range overflow and, * depending on the input, may give interesting results. * * Use these functions if and only if you cannot use kstrto<foo>, because * the conversion ends on the first non-digit character, which may be far * beyond the supported range. It might be useful to parse the strings like * 10x50 or 12:21 without altering original string or temporary buffer in use. * Keep in mind above caveat. */ extern unsigned long simple_strtoul(const char *,char **,unsigned int); extern long simple_strtol(const char *,char **,unsigned int); extern unsigned long long simple_strtoull(const char *,char **,unsigned int); extern long long simple_strtoll(const char *,char **,unsigned int); extern int num_to_str(char *buf, int size, unsigned long long num, unsigned int width); /* lib/printf utilities */ extern __printf(2, 3) int sprintf(char *buf, const char * fmt, ...); extern __printf(2, 0) int vsprintf(char *buf, const char *, va_list); extern __printf(3, 4) int snprintf(char *buf, size_t size, const char *fmt, ...); extern __printf(3, 0) int vsnprintf(char *buf, size_t size, const char *fmt, va_list args); extern __printf(3, 4) int scnprintf(char *buf, size_t size, const char *fmt, ...); extern __printf(3, 0) int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); extern __printf(2, 3) __malloc char *kasprintf(gfp_t gfp, const char *fmt, ...); extern __printf(2, 0) __malloc char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); extern __printf(2, 0) const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list args); extern __scanf(2, 3) int sscanf(const char *, const char *, ...); extern __scanf(2, 0) int vsscanf(const char *, const char *, va_list); extern int get_option(char **str, int *pint); extern char *get_options(const char *str, int nints, int *ints); extern unsigned long long memparse(const char *ptr, char **retptr); extern bool parse_option_str(const char *str, const char *option); extern char *next_arg(char *args, char **param, char **val); extern int core_kernel_text(unsigned long addr); extern int init_kernel_text(unsigned long addr); extern int core_kernel_data(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); extern int func_ptr_is_kernel_text(void *ptr); u64 int_pow(u64 base, unsigned int exp); unsigned long int_sqrt(unsigned long); #if BITS_PER_LONG < 64 u32 int_sqrt64(u64 x); #else static inline u32 int_sqrt64(u64 x) { return (u32)int_sqrt(x); } #endif #ifdef CONFIG_SMP extern unsigned int sysctl_oops_all_cpu_backtrace; #else #define sysctl_oops_all_cpu_backtrace 0 #endif /* CONFIG_SMP */ extern void bust_spinlocks(int yes); extern int panic_timeout; extern unsigned long panic_print; extern int panic_on_oops; extern int panic_on_unrecovered_nmi; extern int panic_on_io_nmi; extern int panic_on_warn; extern unsigned long panic_on_taint; extern bool panic_on_taint_nousertaint; extern int sysctl_panic_on_rcu_stall; extern int sysctl_panic_on_stackoverflow; extern bool crash_kexec_post_notifiers; /* * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It * holds a CPU number which is executing panic() currently. A value of * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec(). */ extern atomic_t panic_cpu; #define PANIC_CPU_INVALID -1 /* * Only to be used by arch init code. If the user over-wrote the default * CONFIG_PANIC_TIMEOUT, honor it. */ static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout) { if (panic_timeout == arch_default_timeout) panic_timeout = timeout; } extern const char *print_tainted(void); enum lockdep_ok { LOCKDEP_STILL_OK, LOCKDEP_NOW_UNRELIABLE }; extern void add_taint(unsigned flag, enum lockdep_ok); extern int test_taint(unsigned flag); extern unsigned long get_taint(void); extern int root_mountflags; extern bool early_boot_irqs_disabled; /* * Values used for system_state. Ordering of the states must not be changed * as code checks for <, <=, >, >= STATE. */ extern enum system_states { SYSTEM_BOOTING, SYSTEM_SCHEDULING, SYSTEM_RUNNING, SYSTEM_HALT, SYSTEM_POWER_OFF, SYSTEM_RESTART, SYSTEM_SUSPEND, } system_state; /* This cannot be an enum because some may be used in assembly source. */ #define TAINT_PROPRIETARY_MODULE 0 #define TAINT_FORCED_MODULE 1 #define TAINT_CPU_OUT_OF_SPEC 2 #define TAINT_FORCED_RMMOD 3 #define TAINT_MACHINE_CHECK 4 #define TAINT_BAD_PAGE 5 #define TAINT_USER 6 #define TAINT_DIE 7 #define TAINT_OVERRIDDEN_ACPI_TABLE 8 #define TAINT_WARN 9 #define TAINT_CRAP 10 #define TAINT_FIRMWARE_WORKAROUND 11 #define TAINT_OOT_MODULE 12 #define TAINT_UNSIGNED_MODULE 13 #define TAINT_SOFTLOCKUP 14 #define TAINT_LIVEPATCH 15 #define TAINT_AUX 16 #define TAINT_RANDSTRUCT 17 #define TAINT_FLAGS_COUNT 18 #define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1) struct taint_flag { char c_true; /* character printed when tainted */ char c_false; /* character printed when not tainted */ bool module; /* also show as a per-module taint flag */ }; extern const struct taint_flag taint_flags[TAINT_FLAGS_COUNT]; extern const char hex_asc[]; #define hex_asc_lo(x) hex_asc[((x) & 0x0f)] #define hex_asc_hi(x) hex_asc[((x) & 0xf0) >> 4] static inline char *hex_byte_pack(char *buf, u8 byte) { *buf++ = hex_asc_hi(byte); *buf++ = hex_asc_lo(byte); return buf; } extern const char hex_asc_upper[]; #define hex_asc_upper_lo(x) hex_asc_upper[((x) & 0x0f)] #define hex_asc_upper_hi(x) hex_asc_upper[((x) & 0xf0) >> 4] static inline char *hex_byte_pack_upper(char *buf, u8 byte) { *buf++ = hex_asc_upper_hi(byte); *buf++ = hex_asc_upper_lo(byte); return buf; } extern int hex_to_bin(char ch); extern int __must_check hex2bin(u8 *dst, const char *src, size_t count); extern char *bin2hex(char *dst, const void *src, size_t count); bool mac_pton(const char *s, u8 *mac); /* * General tracing related utility functions - trace_printk(), * tracing_on/tracing_off and tracing_start()/tracing_stop * * Use tracing_on/tracing_off when you want to quickly turn on or off * tracing. It simply enables or disables the recording of the trace events. * This also corresponds to the user space /sys/kernel/debug/tracing/tracing_on * file, which gives a means for the kernel and userspace to interact. * Place a tracing_off() in the kernel where you want tracing to end. * From user space, examine the trace, and then echo 1 > tracing_on * to continue tracing. * * tracing_stop/tracing_start has slightly more overhead. It is used * by things like suspend to ram where disabling the recording of the * trace is not enough, but tracing must actually stop because things * like calling smp_processor_id() may crash the system. * * Most likely, you want to use tracing_on/tracing_off. */ enum ftrace_dump_mode { DUMP_NONE, DUMP_ALL, DUMP_ORIG, }; #ifdef CONFIG_TRACING void tracing_on(void); void tracing_off(void); int tracing_is_on(void); void tracing_snapshot(void); void tracing_snapshot_alloc(void); extern void tracing_start(void); extern void tracing_stop(void); static inline __printf(1, 2) void ____trace_printk_check_format(const char *fmt, ...) { } #define __trace_printk_check_format(fmt, args...) \ do { \ if (0) \ ____trace_printk_check_format(fmt, ##args); \ } while (0) /** * trace_printk - printf formatting in the ftrace buffer * @fmt: the printf format for printing * * Note: __trace_printk is an internal function for trace_printk() and * the @ip is passed in via the trace_printk() macro. * * This function allows a kernel developer to debug fast path sections * that printk is not appropriate for. By scattering in various * printk like tracing in the code, a developer can quickly see * where problems are occurring. * * This is intended as a debugging tool for the developer only. * Please refrain from leaving trace_printks scattered around in * your code. (Extra memory is used for special buffers that are * allocated when trace_printk() is used.) * * A little optimization trick is done here. If there's only one * argument, there's no need to scan the string for printf formats. * The trace_puts() will suffice. But how can we take advantage of * using trace_puts() when trace_printk() has only one argument? * By stringifying the args and checking the size we can tell * whether or not there are args. __stringify((__VA_ARGS__)) will * turn into "()\0" with a size of 3 when there are no args, anything * else will be bigger. All we need to do is define a string to this, * and then take its size and compare to 3. If it's bigger, use * do_trace_printk() otherwise, optimize it to trace_puts(). Then just * let gcc optimize the rest. */ #define trace_printk(fmt, ...) \ do { \ char _______STR[] = __stringify((__VA_ARGS__)); \ if (sizeof(_______STR) > 3) \ do_trace_printk(fmt, ##__VA_ARGS__); \ else \ trace_puts(fmt); \ } while (0) #define do_trace_printk(fmt, args...) \ do { \ static const char *trace_printk_fmt __used \ __section("__trace_printk_fmt") = \ __builtin_constant_p(fmt) ? fmt : NULL; \ \ __trace_printk_check_format(fmt, ##args); \ \ if (__builtin_constant_p(fmt)) \ __trace_bprintk(_THIS_IP_, trace_printk_fmt, ##args); \ else \ __trace_printk(_THIS_IP_, fmt, ##args); \ } while (0) extern __printf(2, 3) int __trace_bprintk(unsigned long ip, const char *fmt, ...); extern __printf(2, 3) int __trace_printk(unsigned long ip, const char *fmt, ...); /** * trace_puts - write a string into the ftrace buffer * @str: the string to record * * Note: __trace_bputs is an internal function for trace_puts and * the @ip is passed in via the trace_puts macro. * * This is similar to trace_printk() but is made for those really fast * paths that a developer wants the least amount of "Heisenbug" effects, * where the processing of the print format is still too much. * * This function allows a kernel developer to debug fast path sections * that printk is not appropriate for. By scattering in various * printk like tracing in the code, a developer can quickly see * where problems are occurring. * * This is intended as a debugging tool for the developer only. * Please refrain from leaving trace_puts scattered around in * your code. (Extra memory is used for special buffers that are * allocated when trace_puts() is used.) * * Returns: 0 if nothing was written, positive # if string was. * (1 when __trace_bputs is used, strlen(str) when __trace_puts is used) */ #define trace_puts(str) ({ \ static const char *trace_printk_fmt __used \ __section("__trace_printk_fmt") = \ __builtin_constant_p(str) ? str : NULL; \ \ if (__builtin_constant_p(str)) \ __trace_bputs(_THIS_IP_, trace_printk_fmt); \ else \ __trace_puts(_THIS_IP_, str, strlen(str)); \ }) extern int __trace_bputs(unsigned long ip, const char *str); extern int __trace_puts(unsigned long ip, const char *str, int size); extern void trace_dump_stack(int skip); /* * The double __builtin_constant_p is because gcc will give us an error * if we try to allocate the static variable to fmt if it is not a * constant. Even with the outer if statement. */ #define ftrace_vprintk(fmt, vargs) \ do { \ if (__builtin_constant_p(fmt)) { \ static const char *trace_printk_fmt __used \ __section("__trace_printk_fmt") = \ __builtin_constant_p(fmt) ? fmt : NULL; \ \ __ftrace_vbprintk(_THIS_IP_, trace_printk_fmt, vargs); \ } else \ __ftrace_vprintk(_THIS_IP_, fmt, vargs); \ } while (0) extern __printf(2, 0) int __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap); extern __printf(2, 0) int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode); #else static inline void tracing_start(void) { } static inline void tracing_stop(void) { } static inline void trace_dump_stack(int skip) { } static inline void tracing_on(void) { } static inline void tracing_off(void) { } static inline int tracing_is_on(void) { return 0; } static inline void tracing_snapshot(void) { } static inline void tracing_snapshot_alloc(void) { } static inline __printf(1, 2) int trace_printk(const char *fmt, ...) { return 0; } static __printf(1, 0) inline int ftrace_vprintk(const char *fmt, va_list ap) { return 0; } static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #endif /* CONFIG_TRACING */ /* This counts to 12. Any more, it will return 13th argument. */ #define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n #define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) #define __CONCAT(a, b) a ## b #define CONCATENATE(a, b) __CONCAT(a, b) /** * container_of - cast a member of a structure out to the containing structure * @ptr: the pointer to the member. * @type: the type of the container struct this is embedded in. * @member: the name of the member within the struct. * */ #define container_of(ptr, type, member) ({ \ void *__mptr = (void *)(ptr); \ BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ !__same_type(*(ptr), void), \ "pointer type mismatch in container_of()"); \ ((type *)(__mptr - offsetof(type, member))); }) /** * container_of_safe - cast a member of a structure out to the containing structure * @ptr: the pointer to the member. * @type: the type of the container struct this is embedded in. * @member: the name of the member within the struct. * * If IS_ERR_OR_NULL(ptr), ptr is returned unchanged. */ #define container_of_safe(ptr, type, member) ({ \ void *__mptr = (void *)(ptr); \ BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ !__same_type(*(ptr), void), \ "pointer type mismatch in container_of()"); \ IS_ERR_OR_NULL(__mptr) ? ERR_CAST(__mptr) : \ ((type *)(__mptr - offsetof(type, member))); }) /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */ #ifdef CONFIG_FTRACE_MCOUNT_RECORD # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD #endif /* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */ #define VERIFY_OCTAL_PERMISSIONS(perms) \ (BUILD_BUG_ON_ZERO((perms) < 0) + \ BUILD_BUG_ON_ZERO((perms) > 0777) + \ /* USER_READABLE >= GROUP_READABLE >= OTHER_READABLE */ \ BUILD_BUG_ON_ZERO((((perms) >> 6) & 4) < (((perms) >> 3) & 4)) + \ BUILD_BUG_ON_ZERO((((perms) >> 3) & 4) < ((perms) & 4)) + \ /* USER_WRITABLE >= GROUP_WRITABLE */ \ BUILD_BUG_ON_ZERO((((perms) >> 6) & 2) < (((perms) >> 3) & 2)) + \ /* OTHER_WRITABLE? Generally considered a bad idea. */ \ BUILD_BUG_ON_ZERO((perms) & 2) + \ (perms)) #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> */ #ifndef _IP6_FIB_H #define _IP6_FIB_H #include <linux/ipv6_route.h> #include <linux/rtnetlink.h> #include <linux/spinlock.h> #include <linux/notifier.h> #include <net/dst.h> #include <net/flow.h> #include <net/ip_fib.h> #include <net/netlink.h> #include <net/inetpeer.h> #include <net/fib_notifier.h> #include <linux/indirect_call_wrapper.h> #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_HASHSZ 256 #else #define FIB6_TABLE_HASHSZ 1 #endif #define RT6_DEBUG 2 #if RT6_DEBUG >= 3 #define RT6_TRACE(x...) pr_debug(x) #else #define RT6_TRACE(x...) do { ; } while (0) #endif struct rt6_info; struct fib6_info; struct fib6_config { u32 fc_table; u32 fc_metric; int fc_dst_len; int fc_src_len; int fc_ifindex; u32 fc_flags; u32 fc_protocol; u16 fc_type; /* only 8 bits are used */ u16 fc_delete_all_nh : 1, fc_ignore_dev_down:1, __unused : 14; u32 fc_nh_id; struct in6_addr fc_dst; struct in6_addr fc_src; struct in6_addr fc_prefsrc; struct in6_addr fc_gateway; unsigned long fc_expires; struct nlattr *fc_mx; int fc_mx_len; int fc_mp_len; struct nlattr *fc_mp; struct nl_info fc_nlinfo; struct nlattr *fc_encap; u16 fc_encap_type; bool fc_is_fdb; }; struct fib6_node { struct fib6_node __rcu *parent; struct fib6_node __rcu *left; struct fib6_node __rcu *right; #ifdef CONFIG_IPV6_SUBTREES struct fib6_node __rcu *subtree; #endif struct fib6_info __rcu *leaf; __u16 fn_bit; /* bit key */ __u16 fn_flags; int fn_sernum; struct fib6_info __rcu *rr_ptr; struct rcu_head rcu; }; struct fib6_gc_args { int timeout; int more; }; #ifndef CONFIG_IPV6_SUBTREES #define FIB6_SUBTREE(fn) NULL static inline bool fib6_routes_require_src(const struct net *net) { return false; } static inline void fib6_routes_require_src_inc(struct net *net) {} static inline void fib6_routes_require_src_dec(struct net *net) {} #else static inline bool fib6_routes_require_src(const struct net *net) { return net->ipv6.fib6_routes_require_src > 0; } static inline void fib6_routes_require_src_inc(struct net *net) { net->ipv6.fib6_routes_require_src++; } static inline void fib6_routes_require_src_dec(struct net *net) { net->ipv6.fib6_routes_require_src--; } #define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1)) #endif /* * routing information * */ struct rt6key { struct in6_addr addr; int plen; }; struct fib6_table; struct rt6_exception_bucket { struct hlist_head chain; int depth; }; struct rt6_exception { struct hlist_node hlist; struct rt6_info *rt6i; unsigned long stamp; struct rcu_head rcu; }; #define FIB6_EXCEPTION_BUCKET_SIZE_SHIFT 10 #define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT) #define FIB6_MAX_DEPTH 5 struct fib6_nh { struct fib_nh_common nh_common; #ifdef CONFIG_IPV6_ROUTER_PREF unsigned long last_probe; #endif struct rt6_info * __percpu *rt6i_pcpu; struct rt6_exception_bucket __rcu *rt6i_exception_bucket; }; struct fib6_info { struct fib6_table *fib6_table; struct fib6_info __rcu *fib6_next; struct fib6_node __rcu *fib6_node; /* Multipath routes: * siblings is a list of fib6_info that have the same metric/weight, * destination, but not the same gateway. nsiblings is just a cache * to speed up lookup. */ union { struct list_head fib6_siblings; struct list_head nh_list; }; unsigned int fib6_nsiblings; refcount_t fib6_ref; unsigned long expires; struct dst_metrics *fib6_metrics; #define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1] struct rt6key fib6_dst; u32 fib6_flags; struct rt6key fib6_src; struct rt6key fib6_prefsrc; u32 fib6_metric; u8 fib6_protocol; u8 fib6_type; u8 should_flush:1, dst_nocount:1, dst_nopolicy:1, fib6_destroying:1, offload:1, trap:1, unused:2; struct rcu_head rcu; struct nexthop *nh; struct fib6_nh fib6_nh[]; }; struct rt6_info { struct dst_entry dst; struct fib6_info __rcu *from; int sernum; struct rt6key rt6i_dst; struct rt6key rt6i_src; struct in6_addr rt6i_gateway; struct inet6_dev *rt6i_idev; u32 rt6i_flags; struct list_head rt6i_uncached; struct uncached_list *rt6i_uncached_list; /* more non-fragment space at head required */ unsigned short rt6i_nfheader_len; }; struct fib6_result { struct fib6_nh *nh; struct fib6_info *f6i; u32 fib6_flags; u8 fib6_type; struct rt6_info *rt6; }; #define for_each_fib6_node_rt_rcu(fn) \ for (rt = rcu_dereference((fn)->leaf); rt; \ rt = rcu_dereference(rt->fib6_next)) #define for_each_fib6_walker_rt(w) \ for (rt = (w)->leaf; rt; \ rt = rcu_dereference_protected(rt->fib6_next, 1)) static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) { return ((struct rt6_info *)dst)->rt6i_idev; } static inline bool fib6_requires_src(const struct fib6_info *rt) { return rt->fib6_src.plen > 0; } static inline void fib6_clean_expires(struct fib6_info *f6i) { f6i->fib6_flags &= ~RTF_EXPIRES; f6i->expires = 0; } static inline void fib6_set_expires(struct fib6_info *f6i, unsigned long expires) { f6i->expires = expires; f6i->fib6_flags |= RTF_EXPIRES; } static inline bool fib6_check_expired(const struct fib6_info *f6i) { if (f6i->fib6_flags & RTF_EXPIRES) return time_after(jiffies, f6i->expires); return false; } /* Function to safely get fn->sernum for passed in rt * and store result in passed in cookie. * Return true if we can get cookie safely * Return false if not */ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i, u32 *cookie) { struct fib6_node *fn; bool status = false; fn = rcu_dereference(f6i->fib6_node); if (fn) { *cookie = fn->fn_sernum; /* pairs with smp_wmb() in fib6_update_sernum_upto_root() */ smp_rmb(); status = true; } return status; } static inline u32 rt6_get_cookie(const struct rt6_info *rt) { struct fib6_info *from; u32 cookie = 0; if (rt->sernum) return rt->sernum; rcu_read_lock(); from = rcu_dereference(rt->from); if (from) fib6_get_cookie_safe(from, &cookie); rcu_read_unlock(); return cookie; } static inline void ip6_rt_put(struct rt6_info *rt) { /* dst_release() accepts a NULL parameter. * We rely on dst being first structure in struct rt6_info */ BUILD_BUG_ON(offsetof(struct rt6_info, dst) != 0); dst_release(&rt->dst); } struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh); void fib6_info_destroy_rcu(struct rcu_head *head); static inline void fib6_info_hold(struct fib6_info *f6i) { refcount_inc(&f6i->fib6_ref); } static inline bool fib6_info_hold_safe(struct fib6_info *f6i) { return refcount_inc_not_zero(&f6i->fib6_ref); } static inline void fib6_info_release(struct fib6_info *f6i) { if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) call_rcu(&f6i->rcu, fib6_info_destroy_rcu); } static inline void fib6_info_hw_flags_set(struct fib6_info *f6i, bool offload, bool trap) { f6i->offload = offload; f6i->trap = trap; } enum fib6_walk_state { #ifdef CONFIG_IPV6_SUBTREES FWS_S, #endif FWS_L, FWS_R, FWS_C, FWS_U }; struct fib6_walker { struct list_head lh; struct fib6_node *root, *node; struct fib6_info *leaf; enum fib6_walk_state state; unsigned int skip; unsigned int count; unsigned int skip_in_node; int (*func)(struct fib6_walker *); void *args; }; struct rt6_statistics { __u32 fib_nodes; /* all fib6 nodes */ __u32 fib_route_nodes; /* intermediate nodes */ __u32 fib_rt_entries; /* rt entries in fib table */ __u32 fib_rt_cache; /* cached rt entries in exception table */ __u32 fib_discarded_routes; /* total number of routes delete */ /* The following stats are not protected by any lock */ atomic_t fib_rt_alloc; /* total number of routes alloced */ atomic_t fib_rt_uncache; /* rt entries in uncached list */ }; #define RTN_TL_ROOT 0x0001 #define RTN_ROOT 0x0002 /* tree root node */ #define RTN_RTINFO 0x0004 /* node with valid routing info */ /* * priority levels (or metrics) * */ struct fib6_table { struct hlist_node tb6_hlist; u32 tb6_id; spinlock_t tb6_lock; struct fib6_node tb6_root; struct inet_peer_base tb6_peers; unsigned int flags; unsigned int fib_seq; #define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; #define RT6_TABLE_UNSPEC RT_TABLE_UNSPEC #define RT6_TABLE_MAIN RT_TABLE_MAIN #define RT6_TABLE_DFLT RT6_TABLE_MAIN #define RT6_TABLE_INFO RT6_TABLE_MAIN #define RT6_TABLE_PREFIX RT6_TABLE_MAIN #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_MIN 1 #define FIB6_TABLE_MAX RT_TABLE_MAX #define RT6_TABLE_LOCAL RT_TABLE_LOCAL #else #define FIB6_TABLE_MIN RT_TABLE_MAIN #define FIB6_TABLE_MAX FIB6_TABLE_MIN #define RT6_TABLE_LOCAL RT6_TABLE_MAIN #endif typedef struct rt6_info *(*pol_lookup_t)(struct net *, struct fib6_table *, struct flowi6 *, const struct sk_buff *, int); struct fib6_entry_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib6_info *rt; unsigned int nsiblings; }; /* * exported functions */ struct fib6_table *fib6_get_table(struct net *net, u32 id); struct fib6_table *fib6_new_table(struct net *net, u32 id); struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, int flags, pol_lookup_t lookup); /* called with rcu lock held; can return error pointer * caller needs to select path */ int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, struct fib6_result *res, int flags); /* called with rcu lock held; caller needs to select path */ int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, struct fib6_result *res, int strict); void fib6_select_path(const struct net *net, struct fib6_result *res, struct flowi6 *fl6, int oif, bool have_oif_match, const struct sk_buff *skb, int strict); struct fib6_node *fib6_node_lookup(struct fib6_node *root, const struct in6_addr *daddr, const struct in6_addr *saddr); struct fib6_node *fib6_locate(struct fib6_node *root, const struct in6_addr *daddr, int dst_len, const struct in6_addr *saddr, int src_len, bool exact_match); void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg), void *arg); void fib6_clean_all_skip_notify(struct net *net, int (*func)(struct fib6_info *, void *arg), void *arg); int fib6_add(struct fib6_node *root, struct fib6_info *rt, struct nl_info *info, struct netlink_ext_ack *extack); int fib6_del(struct fib6_info *rt, struct nl_info *info); static inline void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr) { const struct fib6_info *from; rcu_read_lock(); from = rcu_dereference(rt->from); if (from) { *addr = from->fib6_prefsrc.addr; } else { struct in6_addr in6_zero = {}; *addr = in6_zero; } rcu_read_unlock(); } int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void fib6_nh_release(struct fib6_nh *fib6_nh); void fib6_nh_release_dsts(struct fib6_nh *fib6_nh); int call_fib6_entry_notifiers(struct net *net, enum fib_event_type event_type, struct fib6_info *rt, struct netlink_ext_ack *extack); int call_fib6_multipath_entry_notifiers(struct net *net, enum fib_event_type event_type, struct fib6_info *rt, unsigned int nsiblings, struct netlink_ext_ack *extack); int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt); void fib6_rt_update(struct net *net, struct fib6_info *rt, struct nl_info *info); void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int flags); void fib6_run_gc(unsigned long expires, struct net *net, bool force); void fib6_gc_cleanup(void); int fib6_init(void); struct ipv6_route_iter { struct seq_net_private p; struct fib6_walker w; loff_t skip; struct fib6_table *tbl; int sernum; }; extern const struct seq_operations ipv6_route_seq_ops; int call_fib6_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info); int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info); int __net_init fib6_notifier_init(struct net *net); void __net_exit fib6_notifier_exit(struct net *net); unsigned int fib6_tables_seq_read(struct net *net); int fib6_tables_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); void fib6_update_sernum(struct net *net, struct fib6_info *rt); void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt); void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i); void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val); static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) { return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric)); } #if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL) struct bpf_iter__ipv6_route { __bpf_md_ptr(struct bpf_iter_meta *, meta); __bpf_md_ptr(struct fib6_info *, rt); }; #endif INDIRECT_CALLABLE_DECLARE(struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags)); INDIRECT_CALLABLE_DECLARE(struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags)); INDIRECT_CALLABLE_DECLARE(struct rt6_info *__ip6_route_redirect(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags)); INDIRECT_CALLABLE_DECLARE(struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags)); static inline struct rt6_info *pol_lookup_func(pol_lookup_t lookup, struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags) { return INDIRECT_CALL_4(lookup, ip6_pol_route_output, ip6_pol_route_input, ip6_pol_route_lookup, __ip6_route_redirect, net, table, fl6, skb, flags); } #ifdef CONFIG_IPV6_MULTIPLE_TABLES static inline bool fib6_has_custom_rules(const struct net *net) { return net->ipv6.fib6_has_custom_rules; } int fib6_rules_init(void); void fib6_rules_cleanup(void); bool fib6_rule_default(const struct fib_rule *rule); int fib6_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); unsigned int fib6_rules_seq_read(struct net *net); static inline bool fib6_rules_early_flow_dissect(struct net *net, struct sk_buff *skb, struct flowi6 *fl6, struct flow_keys *flkeys) { unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; if (!net->ipv6.fib6_rules_require_fldissect) return false; skb_flow_dissect_flow_keys(skb, flkeys, flag); fl6->fl6_sport = flkeys->ports.src; fl6->fl6_dport = flkeys->ports.dst; fl6->flowi6_proto = flkeys->basic.ip_proto; return true; } #else static inline bool fib6_has_custom_rules(const struct net *net) { return false; } static inline int fib6_rules_init(void) { return 0; } static inline void fib6_rules_cleanup(void) { return ; } static inline bool fib6_rule_default(const struct fib_rule *rule) { return true; } static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return 0; } static inline unsigned int fib6_rules_seq_read(struct net *net) { return 0; } static inline bool fib6_rules_early_flow_dissect(struct net *net, struct sk_buff *skb, struct flowi6 *fl6, struct flow_keys *flkeys) { return false; } #endif #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_CONNTRACK_ZONES_H #define _NF_CONNTRACK_ZONES_H #include <linux/netfilter/nf_conntrack_zones_common.h> #include <net/netfilter/nf_conntrack.h> static inline const struct nf_conntrack_zone * nf_ct_zone(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_ZONES return &ct->zone; #else return &nf_ct_zone_dflt; #endif } static inline const struct nf_conntrack_zone * nf_ct_zone_init(struct nf_conntrack_zone *zone, u16 id, u8 dir, u8 flags) { zone->id = id; zone->flags = flags; zone->dir = dir; return zone; } static inline const struct nf_conntrack_zone * nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb, struct nf_conntrack_zone *tmp) { #ifdef CONFIG_NF_CONNTRACK_ZONES if (!tmpl) return &nf_ct_zone_dflt; if (tmpl->zone.flags & NF_CT_FLAG_MARK) return nf_ct_zone_init(tmp, skb->mark, tmpl->zone.dir, 0); #endif return nf_ct_zone(tmpl); } static inline void nf_ct_zone_add(struct nf_conn *ct, const struct nf_conntrack_zone *zone) { #ifdef CONFIG_NF_CONNTRACK_ZONES ct->zone = *zone; #endif } static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone, enum ip_conntrack_dir dir) { return zone->dir & (1 << dir); } static inline u16 nf_ct_zone_id(const struct nf_conntrack_zone *zone, enum ip_conntrack_dir dir) { #ifdef CONFIG_NF_CONNTRACK_ZONES return nf_ct_zone_matches_dir(zone, dir) ? zone->id : NF_CT_DEFAULT_ZONE_ID; #else return NF_CT_DEFAULT_ZONE_ID; #endif } static inline bool nf_ct_zone_equal(const struct nf_conn *a, const struct nf_conntrack_zone *b, enum ip_conntrack_dir dir) { #ifdef CONFIG_NF_CONNTRACK_ZONES return nf_ct_zone_id(nf_ct_zone(a), dir) == nf_ct_zone_id(b, dir); #else return true; #endif } static inline bool nf_ct_zone_equal_any(const struct nf_conn *a, const struct nf_conntrack_zone *b) { #ifdef CONFIG_NF_CONNTRACK_ZONES return nf_ct_zone(a)->id == b->id; #else return true; #endif } #endif /* _NF_CONNTRACK_ZONES_H */
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 /* SPDX-License-Identifier: GPL-2.0 */ /* * Filesystem access notification for Linux * * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> */ #ifndef __LINUX_FSNOTIFY_BACKEND_H #define __LINUX_FSNOTIFY_BACKEND_H #ifdef __KERNEL__ #include <linux/idr.h> /* inotify uses this */ #include <linux/fs.h> /* struct inode */ #include <linux/list.h> #include <linux/path.h> /* struct path */ #include <linux/spinlock.h> #include <linux/types.h> #include <linux/atomic.h> #include <linux/user_namespace.h> #include <linux/refcount.h> /* * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily * convert between them. dnotify only needs conversion at watch creation * so no perf loss there. fanotify isn't defined yet, so it can use the * wholes if it needs more events. */ #define FS_ACCESS 0x00000001 /* File was accessed */ #define FS_MODIFY 0x00000002 /* File was modified */ #define FS_ATTRIB 0x00000004 /* Metadata changed */ #define FS_CLOSE_WRITE 0x00000008 /* Writtable file was closed */ #define FS_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ #define FS_OPEN 0x00000020 /* File was opened */ #define FS_MOVED_FROM 0x00000040 /* File was moved from X */ #define FS_MOVED_TO 0x00000080 /* File was moved to Y */ #define FS_CREATE 0x00000100 /* Subfile was created */ #define FS_DELETE 0x00000200 /* Subfile was deleted */ #define FS_DELETE_SELF 0x00000400 /* Self was deleted */ #define FS_MOVE_SELF 0x00000800 /* Self was moved */ #define FS_OPEN_EXEC 0x00001000 /* File was opened for exec */ #define FS_UNMOUNT 0x00002000 /* inode on umount fs */ #define FS_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ #define FS_IN_IGNORED 0x00008000 /* last inotify event here */ #define FS_OPEN_PERM 0x00010000 /* open event in an permission hook */ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ #define FS_EXCL_UNLINK 0x04000000 /* do not send events if object is unlinked */ /* * Set on inode mark that cares about things that happen to its children. * Always set for dnotify and inotify. * Set on inode/sb/mount marks that care about parent/name info. */ #define FS_EVENT_ON_CHILD 0x08000000 #define FS_DN_RENAME 0x10000000 /* file renamed */ #define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ #define FS_ISDIR 0x40000000 /* event occurred against dir */ #define FS_IN_ONESHOT 0x80000000 /* only send event once */ #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) /* * Directory entry modification events - reported only to directory * where entry is modified and not to a watching parent. * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event * when a directory entry inside a child subdir changes. */ #define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE) #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ FS_OPEN_EXEC_PERM) /* * This is a list of all events that may get sent to a parent that is watching * with flag FS_EVENT_ON_CHILD based on fs event on a child of that directory. */ #define FS_EVENTS_POSS_ON_CHILD (ALL_FSNOTIFY_PERM_EVENTS | \ FS_ACCESS | FS_MODIFY | FS_ATTRIB | \ FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | \ FS_OPEN | FS_OPEN_EXEC) /* * This is a list of all events that may get sent with the parent inode as the * @to_tell argument of fsnotify(). * It may include events that can be sent to an inode/sb/mount mark, but cannot * be sent to a parent watching children. */ #define FS_EVENTS_POSS_TO_PARENT (FS_EVENTS_POSS_ON_CHILD) /* Events that can be reported to backends */ #define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \ FS_EVENTS_POSS_ON_CHILD | \ FS_DELETE_SELF | FS_MOVE_SELF | FS_DN_RENAME | \ FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED) /* Extra flags that may be reported with event or control handling of events */ #define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \ FS_DN_MULTISHOT | FS_EVENT_ON_CHILD) #define ALL_FSNOTIFY_BITS (ALL_FSNOTIFY_EVENTS | ALL_FSNOTIFY_FLAGS) struct fsnotify_group; struct fsnotify_event; struct fsnotify_mark; struct fsnotify_event_private_data; struct fsnotify_fname; struct fsnotify_iter_info; struct mem_cgroup; /* * Each group much define these ops. The fsnotify infrastructure will call * these operations for each relevant group. * * handle_event - main call for a group to handle an fs event * @group: group to notify * @mask: event type and flags * @data: object that event happened on * @data_type: type of object for fanotify_data_XXX() accessors * @dir: optional directory associated with event - * if @file_name is not NULL, this is the directory that * @file_name is relative to * @file_name: optional file name associated with event * @cookie: inotify rename cookie * @iter_info: array of marks from this group that are interested in the event * * handle_inode_event - simple variant of handle_event() for groups that only * have inode marks and don't have ignore mask * @mark: mark to notify * @mask: event type and flags * @inode: inode that event happened on * @dir: optional directory associated with event - * if @file_name is not NULL, this is the directory that * @file_name is relative to. * @file_name: optional file name associated with event * @cookie: inotify rename cookie * * free_group_priv - called when a group refcnt hits 0 to clean up the private union * freeing_mark - called when a mark is being destroyed for some reason. The group * MUST be holding a reference on each mark and that reference must be * dropped in this function. inotify uses this function to send * userspace messages that marks have been removed. */ struct fsnotify_ops { int (*handle_event)(struct fsnotify_group *group, u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *file_name, u32 cookie, struct fsnotify_iter_info *iter_info); int (*handle_inode_event)(struct fsnotify_mark *mark, u32 mask, struct inode *inode, struct inode *dir, const struct qstr *file_name, u32 cookie); void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); void (*free_event)(struct fsnotify_event *event); /* called on final put+free to free memory */ void (*free_mark)(struct fsnotify_mark *mark); }; /* * all of the information about the original object we want to now send to * a group. If you want to carry more info from the accessing task to the * listener this structure is where you need to be adding fields. */ struct fsnotify_event { struct list_head list; unsigned long objectid; /* identifier for queue merges */ }; /* * A group is a "thing" that wants to receive notification about filesystem * events. The mask holds the subset of event types this group cares about. * refcnt on a group is up to the implementor and at any moment if it goes 0 * everything will be cleaned up. */ struct fsnotify_group { const struct fsnotify_ops *ops; /* how this group handles things */ /* * How the refcnt is used is up to each group. When the refcnt hits 0 * fsnotify will clean up all of the resources associated with this group. * As an example, the dnotify group will always have a refcnt=1 and that * will never change. Inotify, on the other hand, has a group per * inotify_init() and the refcnt will hit 0 only when that fd has been * closed. */ refcount_t refcnt; /* things with interest in this group */ /* needed to send notification to userspace */ spinlock_t notification_lock; /* protect the notification_list */ struct list_head notification_list; /* list of event_holder this group needs to send to userspace */ wait_queue_head_t notification_waitq; /* read() on the notification file blocks on this waitq */ unsigned int q_len; /* events on the queue */ unsigned int max_events; /* maximum events allowed on the list */ /* * Valid fsnotify group priorities. Events are send in order from highest * priority to lowest priority. We default to the lowest priority. */ #define FS_PRIO_0 0 /* normal notifiers, no permissions */ #define FS_PRIO_1 1 /* fanotify content based access control */ #define FS_PRIO_2 2 /* fanotify pre-content access */ unsigned int priority; bool shutdown; /* group is being shut down, don't queue more events */ /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ struct mutex mark_mutex; /* protect marks_list */ atomic_t num_marks; /* 1 for each mark and 1 for not being * past the point of no return when freeing * a group */ atomic_t user_waits; /* Number of tasks waiting for user * response */ struct list_head marks_list; /* all inode marks for this group */ struct fasync_struct *fsn_fa; /* async notification */ struct fsnotify_event *overflow_event; /* Event we queue when the * notification list is too * full */ struct mem_cgroup *memcg; /* memcg to charge allocations */ /* groups can define private fields here or use the void *private */ union { void *private; #ifdef CONFIG_INOTIFY_USER struct inotify_group_private_data { spinlock_t idr_lock; struct idr idr; struct ucounts *ucounts; } inotify_data; #endif #ifdef CONFIG_FANOTIFY struct fanotify_group_private_data { /* allows a group to block waiting for a userspace response */ struct list_head access_list; wait_queue_head_t access_waitq; int flags; /* flags from fanotify_init() */ int f_flags; /* event_f_flags from fanotify_init() */ unsigned int max_marks; struct user_struct *user; } fanotify_data; #endif /* CONFIG_FANOTIFY */ }; }; /* When calling fsnotify tell it if the data is a path or inode */ enum fsnotify_data_type { FSNOTIFY_EVENT_NONE, FSNOTIFY_EVENT_PATH, FSNOTIFY_EVENT_INODE, }; static inline struct inode *fsnotify_data_inode(const void *data, int data_type) { switch (data_type) { case FSNOTIFY_EVENT_INODE: return (struct inode *)data; case FSNOTIFY_EVENT_PATH: return d_inode(((const struct path *)data)->dentry); default: return NULL; } } static inline const struct path *fsnotify_data_path(const void *data, int data_type) { switch (data_type) { case FSNOTIFY_EVENT_PATH: return data; default: return NULL; } } enum fsnotify_obj_type { FSNOTIFY_OBJ_TYPE_INODE, FSNOTIFY_OBJ_TYPE_PARENT, FSNOTIFY_OBJ_TYPE_VFSMOUNT, FSNOTIFY_OBJ_TYPE_SB, FSNOTIFY_OBJ_TYPE_COUNT, FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT }; #define FSNOTIFY_OBJ_TYPE_INODE_FL (1U << FSNOTIFY_OBJ_TYPE_INODE) #define FSNOTIFY_OBJ_TYPE_PARENT_FL (1U << FSNOTIFY_OBJ_TYPE_PARENT) #define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL (1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT) #define FSNOTIFY_OBJ_TYPE_SB_FL (1U << FSNOTIFY_OBJ_TYPE_SB) #define FSNOTIFY_OBJ_ALL_TYPES_MASK ((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1) static inline bool fsnotify_valid_obj_type(unsigned int type) { return (type < FSNOTIFY_OBJ_TYPE_COUNT); } struct fsnotify_iter_info { struct fsnotify_mark *marks[FSNOTIFY_OBJ_TYPE_COUNT]; unsigned int report_mask; int srcu_idx; }; static inline bool fsnotify_iter_should_report_type( struct fsnotify_iter_info *iter_info, int type) { return (iter_info->report_mask & (1U << type)); } static inline void fsnotify_iter_set_report_type( struct fsnotify_iter_info *iter_info, int type) { iter_info->report_mask |= (1U << type); } static inline void fsnotify_iter_set_report_type_mark( struct fsnotify_iter_info *iter_info, int type, struct fsnotify_mark *mark) { iter_info->marks[type] = mark; iter_info->report_mask |= (1U << type); } #define FSNOTIFY_ITER_FUNCS(name, NAME) \ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \ struct fsnotify_iter_info *iter_info) \ { \ return (iter_info->report_mask & FSNOTIFY_OBJ_TYPE_##NAME##_FL) ? \ iter_info->marks[FSNOTIFY_OBJ_TYPE_##NAME] : NULL; \ } FSNOTIFY_ITER_FUNCS(inode, INODE) FSNOTIFY_ITER_FUNCS(parent, PARENT) FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT) FSNOTIFY_ITER_FUNCS(sb, SB) #define fsnotify_foreach_obj_type(type) \ for (type = 0; type < FSNOTIFY_OBJ_TYPE_COUNT; type++) /* * fsnotify_connp_t is what we embed in objects which connector can be attached * to. fsnotify_connp_t * is how we refer from connector back to object. */ struct fsnotify_mark_connector; typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; /* * Inode/vfsmount/sb point to this structure which tracks all marks attached to * the inode/vfsmount/sb. The reference to inode/vfsmount/sb is held by this * structure. We destroy this structure when there are no more marks attached * to it. The structure is protected by fsnotify_mark_srcu. */ struct fsnotify_mark_connector { spinlock_t lock; unsigned short type; /* Type of object [lock] */ #define FSNOTIFY_CONN_FLAG_HAS_FSID 0x01 unsigned short flags; /* flags [lock] */ __kernel_fsid_t fsid; /* fsid of filesystem containing object */ union { /* Object pointer [lock] */ fsnotify_connp_t *obj; /* Used listing heads to free after srcu period expires */ struct fsnotify_mark_connector *destroy_next; }; struct hlist_head list; }; /* * A mark is simply an object attached to an in core inode which allows an * fsnotify listener to indicate they are either no longer interested in events * of a type matching mask or only interested in those events. * * These are flushed when an inode is evicted from core and may be flushed * when the inode is modified (as seen by fsnotify_access). Some fsnotify * users (such as dnotify) will flush these when the open fd is closed and not * at inode eviction or modification. * * Text in brackets is showing the lock(s) protecting modifications of a * particular entry. obj_lock means either inode->i_lock or * mnt->mnt_root->d_lock depending on the mark type. */ struct fsnotify_mark { /* Mask this mark is for [mark->lock, group->mark_mutex] */ __u32 mask; /* We hold one for presence in g_list. Also one ref for each 'thing' * in kernel that found and may be using this mark. */ refcount_t refcnt; /* Group this mark is for. Set on mark creation, stable until last ref * is dropped */ struct fsnotify_group *group; /* List of marks by group->marks_list. Also reused for queueing * mark into destroy_list when it's waiting for the end of SRCU period * before it can be freed. [group->mark_mutex] */ struct list_head g_list; /* Protects inode / mnt pointers, flags, masks */ spinlock_t lock; /* List of marks for inode / vfsmount [connector->lock, mark ref] */ struct hlist_node obj_list; /* Head of list of marks for an object [mark ref] */ struct fsnotify_mark_connector *connector; /* Events types to ignore [mark->lock, group->mark_mutex] */ __u32 ignored_mask; #define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x01 #define FSNOTIFY_MARK_FLAG_ALIVE 0x02 #define FSNOTIFY_MARK_FLAG_ATTACHED 0x04 unsigned int flags; /* flags [mark->lock] */ }; #ifdef CONFIG_FSNOTIFY /* called from the vfs helpers */ /* main fsnotify call to send events */ extern int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, struct inode *inode, u32 cookie); extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, int data_type); extern void __fsnotify_inode_delete(struct inode *inode); extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); extern void fsnotify_sb_delete(struct super_block *sb); extern u32 fsnotify_get_cookie(void); static inline __u32 fsnotify_parent_needed_mask(__u32 mask) { /* FS_EVENT_ON_CHILD is set on marks that want parent/name info */ if (!(mask & FS_EVENT_ON_CHILD)) return 0; /* * This object might be watched by a mark that cares about parent/name * info, does it care about the specific set of events that can be * reported with parent/name info? */ return mask & FS_EVENTS_POSS_TO_PARENT; } static inline int fsnotify_inode_watches_children(struct inode *inode) { /* FS_EVENT_ON_CHILD is set if the inode may care */ if (!(inode->i_fsnotify_mask & FS_EVENT_ON_CHILD)) return 0; /* this inode might care about child events, does it care about the * specific set of events that can happen on a child? */ return inode->i_fsnotify_mask & FS_EVENTS_POSS_ON_CHILD; } /* * Update the dentry with a flag indicating the interest of its parent to receive * filesystem events when those events happens to this dentry->d_inode. */ static inline void fsnotify_update_flags(struct dentry *dentry) { assert_spin_locked(&dentry->d_lock); /* * Serialisation of setting PARENT_WATCHED on the dentries is provided * by d_lock. If inotify_inode_watched changes after we have taken * d_lock, the following __fsnotify_update_child_dentry_flags call will * find our entry, so it will spin until we complete here, and update * us with the new state. */ if (fsnotify_inode_watches_children(dentry->d_parent->d_inode)) dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; else dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; } /* called from fsnotify listeners, such as fanotify or dnotify */ /* create a new group */ extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops); /* get reference to a group */ extern void fsnotify_get_group(struct fsnotify_group *group); /* drop reference on a group from fsnotify_alloc_group */ extern void fsnotify_put_group(struct fsnotify_group *group); /* group destruction begins, stop queuing new events */ extern void fsnotify_group_stop_queueing(struct fsnotify_group *group); /* destroy group */ extern void fsnotify_destroy_group(struct fsnotify_group *group); /* fasync handler function */ extern int fsnotify_fasync(int fd, struct file *file, int on); /* Free event from memory */ extern void fsnotify_destroy_event(struct fsnotify_group *group, struct fsnotify_event *event); /* attach the event to the group notification queue */ extern int fsnotify_add_event(struct fsnotify_group *group, struct fsnotify_event *event, int (*merge)(struct list_head *, struct fsnotify_event *)); /* Queue overflow event to a notification group */ static inline void fsnotify_queue_overflow(struct fsnotify_group *group) { fsnotify_add_event(group, group->overflow_event, NULL); } /* true if the group notification queue is empty */ extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); /* return, but do not dequeue the first event on the notification queue */ extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group); /* return AND dequeue the first event on the notification queue */ extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group); /* Remove event queued in the notification list */ extern void fsnotify_remove_queued_event(struct fsnotify_group *group, struct fsnotify_event *event); /* functions used to manipulate the marks attached to inodes */ /* Get mask of events for a list of marks */ extern __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn); /* Calculate mask of events for a list of marks */ extern void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn); extern void fsnotify_init_mark(struct fsnotify_mark *mark, struct fsnotify_group *group); /* Find mark belonging to given group in the list of marks */ extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, struct fsnotify_group *group); /* Get cached fsid of filesystem containing object */ extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn, __kernel_fsid_t *fsid); /* attach the mark to the object */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int type, int allow_dups, __kernel_fsid_t *fsid); extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int type, int allow_dups, __kernel_fsid_t *fsid); /* attach the mark to the inode */ static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark, struct inode *inode, int allow_dups) { return fsnotify_add_mark(mark, &inode->i_fsnotify_marks, FSNOTIFY_OBJ_TYPE_INODE, allow_dups, NULL); } static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, struct inode *inode, int allow_dups) { return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks, FSNOTIFY_OBJ_TYPE_INODE, allow_dups, NULL); } /* given a group and a mark, flag mark to be freed when all references are dropped */ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, struct fsnotify_group *group); /* detach mark from inode / mount list, group list, drop inode reference */ extern void fsnotify_detach_mark(struct fsnotify_mark *mark); /* free mark */ extern void fsnotify_free_mark(struct fsnotify_mark *mark); /* Wait until all marks queued for destruction are destroyed */ extern void fsnotify_wait_marks_destroyed(void); /* run all the marks in a group, and clear all of the marks attached to given object type */ extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type); /* run all the marks in a group, and clear all of the vfsmount marks */ static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) { fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL); } /* run all the marks in a group, and clear all of the inode marks */ static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) { fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE_FL); } /* run all the marks in a group, and clear all of the sn marks */ static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group) { fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB_FL); } extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); static inline void fsnotify_init_event(struct fsnotify_event *event, unsigned long objectid) { INIT_LIST_HEAD(&event->list); event->objectid = objectid; } #else static inline int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, struct inode *inode, u32 cookie) { return 0; } static inline int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, int data_type) { return 0; } static inline void __fsnotify_inode_delete(struct inode *inode) {} static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt) {} static inline void fsnotify_sb_delete(struct super_block *sb) {} static inline void fsnotify_update_flags(struct dentry *dentry) {} static inline u32 fsnotify_get_cookie(void) { return 0; } static inline void fsnotify_unmount_inodes(struct super_block *sb) {} #endif /* CONFIG_FSNOTIFY */ #endif /* __KERNEL __ */ #endif /* __LINUX_FSNOTIFY_BACKEND_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_CPUMASK_H #define __LINUX_CPUMASK_H /* * Cpumasks provide a bitmap suitable for representing the * set of CPU's in a system, one bit position per CPU number. In general, * only nr_cpu_ids (<= NR_CPUS) bits are valid. */ #include <linux/kernel.h> #include <linux/threads.h> #include <linux/bitmap.h> #include <linux/atomic.h> #include <linux/bug.h> /* Don't assign or return these: may not be this big! */ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; /** * cpumask_bits - get the bits in a cpumask * @maskp: the struct cpumask * * * You should only assume nr_cpu_ids bits of this mask are valid. This is * a macro so it's const-correct. */ #define cpumask_bits(maskp) ((maskp)->bits) /** * cpumask_pr_args - printf args to output a cpumask * @maskp: cpumask to be printed * * Can be used to provide arguments for '%*pb[l]' when printing a cpumask. */ #define cpumask_pr_args(maskp) nr_cpu_ids, cpumask_bits(maskp) #if NR_CPUS == 1 #define nr_cpu_ids 1U #else extern unsigned int nr_cpu_ids; #endif #ifdef CONFIG_CPUMASK_OFFSTACK /* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, * not all bits may be allocated. */ #define nr_cpumask_bits nr_cpu_ids #else #define nr_cpumask_bits ((unsigned int)NR_CPUS) #endif /* * The following particular system cpumasks and operations manage * possible, present, active and online cpus. * * cpu_possible_mask- has bit 'cpu' set iff cpu is populatable * cpu_present_mask - has bit 'cpu' set iff cpu is populated * cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler * cpu_active_mask - has bit 'cpu' set iff cpu available to migration * * If !CONFIG_HOTPLUG_CPU, present == possible, and active == online. * * The cpu_possible_mask is fixed at boot time, as the set of CPU id's * that it is possible might ever be plugged in at anytime during the * life of that system boot. The cpu_present_mask is dynamic(*), * representing which CPUs are currently plugged in. And * cpu_online_mask is the dynamic subset of cpu_present_mask, * indicating those CPUs available for scheduling. * * If HOTPLUG is enabled, then cpu_possible_mask is forced to have * all NR_CPUS bits set, otherwise it is just the set of CPUs that * ACPI reports present at boot. * * If HOTPLUG is enabled, then cpu_present_mask varies dynamically, * depending on what ACPI reports as currently plugged in, otherwise * cpu_present_mask is just a copy of cpu_possible_mask. * * (*) Well, cpu_present_mask is dynamic in the hotplug case. If not * hotplug, it's a copy of cpu_possible_mask, hence fixed at boot. * * Subtleties: * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode * assumption that their single CPU is online. The UP * cpu_{online,possible,present}_masks are placebos. Changing them * will have no useful affect on the following num_*_cpus() * and cpu_*() macros in the UP case. This ugliness is a UP * optimization - don't waste any instructions or memory references * asking if you're online or how many CPUs there are if there is * only one CPU. */ extern struct cpumask __cpu_possible_mask; extern struct cpumask __cpu_online_mask; extern struct cpumask __cpu_present_mask; extern struct cpumask __cpu_active_mask; #define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask) #define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask) #define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask) #define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask) extern atomic_t __num_online_cpus; #if NR_CPUS > 1 /** * num_online_cpus() - Read the number of online CPUs * * Despite the fact that __num_online_cpus is of type atomic_t, this * interface gives only a momentary snapshot and is not protected against * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held * region. */ static inline unsigned int num_online_cpus(void) { return atomic_read(&__num_online_cpus); } #define num_possible_cpus() cpumask_weight(cpu_possible_mask) #define num_present_cpus() cpumask_weight(cpu_present_mask) #define num_active_cpus() cpumask_weight(cpu_active_mask) #define cpu_online(cpu) cpumask_test_cpu((cpu), cpu_online_mask) #define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask) #define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask) #define cpu_active(cpu) cpumask_test_cpu((cpu), cpu_active_mask) #else #define num_online_cpus() 1U #define num_possible_cpus() 1U #define num_present_cpus() 1U #define num_active_cpus() 1U #define cpu_online(cpu) ((cpu) == 0) #define cpu_possible(cpu) ((cpu) == 0) #define cpu_present(cpu) ((cpu) == 0) #define cpu_active(cpu) ((cpu) == 0) #endif extern cpumask_t cpus_booted_once_mask; static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits) { #ifdef CONFIG_DEBUG_PER_CPU_MAPS WARN_ON_ONCE(cpu >= bits); #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ } /* verify cpu argument to cpumask_* operators */ static inline unsigned int cpumask_check(unsigned int cpu) { cpu_max_bits_warn(cpu, nr_cpumask_bits); return cpu; } #if NR_CPUS == 1 /* Uniprocessor. Assume all masks are "1". */ static inline unsigned int cpumask_first(const struct cpumask *srcp) { return 0; } static inline unsigned int cpumask_last(const struct cpumask *srcp) { return 0; } /* Valid inputs for n are -1 and 0. */ static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) { return n+1; } static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) { return n+1; } static inline unsigned int cpumask_next_and(int n, const struct cpumask *srcp, const struct cpumask *andp) { return n+1; } static inline unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap) { /* cpu0 unless stop condition, wrap and at cpu0, then nr_cpumask_bits */ return (wrap && n == 0); } /* cpu must be a valid cpu, ie 0, so there's no other choice. */ static inline unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) { return 1; } static inline unsigned int cpumask_local_spread(unsigned int i, int node) { return 0; } static inline int cpumask_any_and_distribute(const struct cpumask *src1p, const struct cpumask *src2p) { return cpumask_next_and(-1, src1p, src2p); } #define for_each_cpu(cpu, mask) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) #define for_each_cpu_not(cpu, mask) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) #define for_each_cpu_wrap(cpu, mask, start) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start)) #define for_each_cpu_and(cpu, mask1, mask2) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask1, (void)mask2) #else /** * cpumask_first - get the first cpu in a cpumask * @srcp: the cpumask pointer * * Returns >= nr_cpu_ids if no cpus set. */ static inline unsigned int cpumask_first(const struct cpumask *srcp) { return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); } /** * cpumask_last - get the last CPU in a cpumask * @srcp: - the cpumask pointer * * Returns >= nr_cpumask_bits if no CPUs set. */ static inline unsigned int cpumask_last(const struct cpumask *srcp) { return find_last_bit(cpumask_bits(srcp), nr_cpumask_bits); } unsigned int cpumask_next(int n, const struct cpumask *srcp); /** * cpumask_next_zero - get the next unset cpu in a cpumask * @n: the cpu prior to the place to search (ie. return will be > @n) * @srcp: the cpumask pointer * * Returns >= nr_cpu_ids if no further cpus unset. */ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) { /* -1 is a legal arg here. */ if (n != -1) cpumask_check(n); return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); } int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); unsigned int cpumask_local_spread(unsigned int i, int node); int cpumask_any_and_distribute(const struct cpumask *src1p, const struct cpumask *src2p); /** * for_each_cpu - iterate over every cpu in a mask * @cpu: the (optionally unsigned) integer iterator * @mask: the cpumask pointer * * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu(cpu, mask) \ for ((cpu) = -1; \ (cpu) = cpumask_next((cpu), (mask)), \ (cpu) < nr_cpu_ids;) /** * for_each_cpu_not - iterate over every cpu in a complemented mask * @cpu: the (optionally unsigned) integer iterator * @mask: the cpumask pointer * * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_not(cpu, mask) \ for ((cpu) = -1; \ (cpu) = cpumask_next_zero((cpu), (mask)), \ (cpu) < nr_cpu_ids;) extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap); /** * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location * @cpu: the (optionally unsigned) integer iterator * @mask: the cpumask poiter * @start: the start location * * The implementation does not assume any bit in @mask is set (including @start). * * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_wrap(cpu, mask, start) \ for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false); \ (cpu) < nr_cpumask_bits; \ (cpu) = cpumask_next_wrap((cpu), (mask), (start), true)) /** * for_each_cpu_and - iterate over every cpu in both masks * @cpu: the (optionally unsigned) integer iterator * @mask1: the first cpumask pointer * @mask2: the second cpumask pointer * * This saves a temporary CPU mask in many places. It is equivalent to: * struct cpumask tmp; * cpumask_and(&tmp, &mask1, &mask2); * for_each_cpu(cpu, &tmp) * ... * * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_and(cpu, mask1, mask2) \ for ((cpu) = -1; \ (cpu) = cpumask_next_and((cpu), (mask1), (mask2)), \ (cpu) < nr_cpu_ids;) #endif /* SMP */ #define CPU_BITS_NONE \ { \ [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ } #define CPU_BITS_CPU0 \ { \ [0] = 1UL \ } /** * cpumask_set_cpu - set a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) * @dstp: the cpumask pointer */ static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) { set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } static inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) { __set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } /** * cpumask_clear_cpu - clear a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) * @dstp: the cpumask pointer */ static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp) { clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); } static inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) { __clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); } /** * cpumask_test_cpu - test for a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * * Returns 1 if @cpu is set in @cpumask, else returns 0 */ static inline int cpumask_test_cpu(int cpu, const struct cpumask *cpumask) { return test_bit(cpumask_check(cpu), cpumask_bits((cpumask))); } /** * cpumask_test_and_set_cpu - atomically test and set a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * * Returns 1 if @cpu is set in old bitmap of @cpumask, else returns 0 * * test_and_set_bit wrapper for cpumasks. */ static inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) { return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } /** * cpumask_test_and_clear_cpu - atomically test and clear a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * * Returns 1 if @cpu is set in old bitmap of @cpumask, else returns 0 * * test_and_clear_bit wrapper for cpumasks. */ static inline int cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) { return test_and_clear_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } /** * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask * @dstp: the cpumask pointer */ static inline void cpumask_setall(struct cpumask *dstp) { bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits); } /** * cpumask_clear - clear all cpus (< nr_cpu_ids) in a cpumask * @dstp: the cpumask pointer */ static inline void cpumask_clear(struct cpumask *dstp) { bitmap_zero(cpumask_bits(dstp), nr_cpumask_bits); } /** * cpumask_and - *dstp = *src1p & *src2p * @dstp: the cpumask result * @src1p: the first input * @src2p: the second input * * If *@dstp is empty, returns 0, else returns 1 */ static inline int cpumask_and(struct cpumask *dstp, const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), nr_cpumask_bits); } /** * cpumask_or - *dstp = *src1p | *src2p * @dstp: the cpumask result * @src1p: the first input * @src2p: the second input */ static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p, const struct cpumask *src2p) { bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), nr_cpumask_bits); } /** * cpumask_xor - *dstp = *src1p ^ *src2p * @dstp: the cpumask result * @src1p: the first input * @src2p: the second input */ static inline void cpumask_xor(struct cpumask *dstp, const struct cpumask *src1p, const struct cpumask *src2p) { bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), nr_cpumask_bits); } /** * cpumask_andnot - *dstp = *src1p & ~*src2p * @dstp: the cpumask result * @src1p: the first input * @src2p: the second input * * If *@dstp is empty, returns 0, else returns 1 */ static inline int cpumask_andnot(struct cpumask *dstp, const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), nr_cpumask_bits); } /** * cpumask_complement - *dstp = ~*srcp * @dstp: the cpumask result * @srcp: the input to invert */ static inline void cpumask_complement(struct cpumask *dstp, const struct cpumask *srcp) { bitmap_complement(cpumask_bits(dstp), cpumask_bits(srcp), nr_cpumask_bits); } /** * cpumask_equal - *src1p == *src2p * @src1p: the first input * @src2p: the second input */ static inline bool cpumask_equal(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p), nr_cpumask_bits); } /** * cpumask_or_equal - *src1p | *src2p == *src3p * @src1p: the first input * @src2p: the second input * @src3p: the third input */ static inline bool cpumask_or_equal(const struct cpumask *src1p, const struct cpumask *src2p, const struct cpumask *src3p) { return bitmap_or_equal(cpumask_bits(src1p), cpumask_bits(src2p), cpumask_bits(src3p), nr_cpumask_bits); } /** * cpumask_intersects - (*src1p & *src2p) != 0 * @src1p: the first input * @src2p: the second input */ static inline bool cpumask_intersects(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p), nr_cpumask_bits); } /** * cpumask_subset - (*src1p & ~*src2p) == 0 * @src1p: the first input * @src2p: the second input * * Returns 1 if *@src1p is a subset of *@src2p, else returns 0 */ static inline int cpumask_subset(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p), nr_cpumask_bits); } /** * cpumask_empty - *srcp == 0 * @srcp: the cpumask to that all cpus < nr_cpu_ids are clear. */ static inline bool cpumask_empty(const struct cpumask *srcp) { return bitmap_empty(cpumask_bits(srcp), nr_cpumask_bits); } /** * cpumask_full - *srcp == 0xFFFFFFFF... * @srcp: the cpumask to that all cpus < nr_cpu_ids are set. */ static inline bool cpumask_full(const struct cpumask *srcp) { return bitmap_full(cpumask_bits(srcp), nr_cpumask_bits); } /** * cpumask_weight - Count of bits in *srcp * @srcp: the cpumask to count bits (< nr_cpu_ids) in. */ static inline unsigned int cpumask_weight(const struct cpumask *srcp) { return bitmap_weight(cpumask_bits(srcp), nr_cpumask_bits); } /** * cpumask_shift_right - *dstp = *srcp >> n * @dstp: the cpumask result * @srcp: the input to shift * @n: the number of bits to shift by */ static inline void cpumask_shift_right(struct cpumask *dstp, const struct cpumask *srcp, int n) { bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n, nr_cpumask_bits); } /** * cpumask_shift_left - *dstp = *srcp << n * @dstp: the cpumask result * @srcp: the input to shift * @n: the number of bits to shift by */ static inline void cpumask_shift_left(struct cpumask *dstp, const struct cpumask *srcp, int n) { bitmap_shift_left(cpumask_bits(dstp), cpumask_bits(srcp), n, nr_cpumask_bits); } /** * cpumask_copy - *dstp = *srcp * @dstp: the result * @srcp: the input cpumask */ static inline void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp) { bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), nr_cpumask_bits); } /** * cpumask_any - pick a "random" cpu from *srcp * @srcp: the input cpumask * * Returns >= nr_cpu_ids if no cpus set. */ #define cpumask_any(srcp) cpumask_first(srcp) /** * cpumask_first_and - return the first cpu from *srcp1 & *srcp2 * @src1p: the first input * @src2p: the second input * * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). */ #define cpumask_first_and(src1p, src2p) cpumask_next_and(-1, (src1p), (src2p)) /** * cpumask_any_and - pick a "random" cpu from *mask1 & *mask2 * @mask1: the first input cpumask * @mask2: the second input cpumask * * Returns >= nr_cpu_ids if no cpus set. */ #define cpumask_any_and(mask1, mask2) cpumask_first_and((mask1), (mask2)) /** * cpumask_of - the cpumask containing just a given cpu * @cpu: the cpu (<= nr_cpu_ids) */ #define cpumask_of(cpu) (get_cpu_mask(cpu)) /** * cpumask_parse_user - extract a cpumask from a user string * @buf: the buffer to extract from * @len: the length of the buffer * @dstp: the cpumask to set. * * Returns -errno, or 0 for success. */ static inline int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp) { return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits); } /** * cpumask_parselist_user - extract a cpumask from a user string * @buf: the buffer to extract from * @len: the length of the buffer * @dstp: the cpumask to set. * * Returns -errno, or 0 for success. */ static inline int cpumask_parselist_user(const char __user *buf, int len, struct cpumask *dstp) { return bitmap_parselist_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits); } /** * cpumask_parse - extract a cpumask from a string * @buf: the buffer to extract from * @dstp: the cpumask to set. * * Returns -errno, or 0 for success. */ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) { return bitmap_parse(buf, UINT_MAX, cpumask_bits(dstp), nr_cpumask_bits); } /** * cpulist_parse - extract a cpumask from a user string of ranges * @buf: the buffer to extract from * @dstp: the cpumask to set. * * Returns -errno, or 0 for success. */ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) { return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits); } /** * cpumask_size - size to allocate for a 'struct cpumask' in bytes */ static inline unsigned int cpumask_size(void) { return BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long); } /* * cpumask_var_t: struct cpumask for stack usage. * * Oh, the wicked games we play! In order to make kernel coding a * little more difficult, we typedef cpumask_var_t to an array or a * pointer: doing &mask on an array is a noop, so it still works. * * ie. * cpumask_var_t tmpmask; * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) * return -ENOMEM; * * ... use 'tmpmask' like a normal struct cpumask * ... * * free_cpumask_var(tmpmask); * * * However, one notable exception is there. alloc_cpumask_var() allocates * only nr_cpumask_bits bits (in the other hand, real cpumask_t always has * NR_CPUS bits). Therefore you don't have to dereference cpumask_var_t. * * cpumask_var_t tmpmask; * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) * return -ENOMEM; * * var = *tmpmask; * * This code makes NR_CPUS length memcopy and brings to a memory corruption. * cpumask_copy() provide safe copy functionality. * * Note that there is another evil here: If you define a cpumask_var_t * as a percpu variable then the way to obtain the address of the cpumask * structure differently influences what this_cpu_* operation needs to be * used. Please use this_cpu_cpumask_var_t in those cases. The direct use * of this_cpu_ptr() or this_cpu_read() will lead to failures when the * other type of cpumask_var_t implementation is configured. * * Please also note that __cpumask_var_read_mostly can be used to declare * a cpumask_var_t variable itself (not its content) as read mostly. */ #ifdef CONFIG_CPUMASK_OFFSTACK typedef struct cpumask *cpumask_var_t; #define this_cpu_cpumask_var_ptr(x) this_cpu_read(x) #define __cpumask_var_read_mostly __read_mostly bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); void alloc_bootmem_cpumask_var(cpumask_var_t *mask); void free_cpumask_var(cpumask_var_t mask); void free_bootmem_cpumask_var(cpumask_var_t mask); static inline bool cpumask_available(cpumask_var_t mask) { return mask != NULL; } #else typedef struct cpumask cpumask_var_t[1]; #define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x) #define __cpumask_var_read_mostly static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { return true; } static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) { return true; } static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { cpumask_clear(*mask); return true; } static inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) { cpumask_clear(*mask); return true; } static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) { } static inline void free_cpumask_var(cpumask_var_t mask) { } static inline void free_bootmem_cpumask_var(cpumask_var_t mask) { } static inline bool cpumask_available(cpumask_var_t mask) { return true; } #endif /* CONFIG_CPUMASK_OFFSTACK */ /* It's common to want to use cpu_all_mask in struct member initializers, * so it has to refer to an address rather than a pointer. */ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); #define cpu_all_mask to_cpumask(cpu_all_bits) /* First bits of cpu_bit_bitmap are in fact unset. */ #define cpu_none_mask to_cpumask(cpu_bit_bitmap[0]) #define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask) #define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask) #define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask) /* Wrappers for arch boot code to manipulate normally-constant masks */ void init_cpu_present(const struct cpumask *src); void init_cpu_possible(const struct cpumask *src); void init_cpu_online(const struct cpumask *src); static inline void reset_cpu_possible_mask(void) { bitmap_zero(cpumask_bits(&__cpu_possible_mask), NR_CPUS); } static inline void set_cpu_possible(unsigned int cpu, bool possible) { if (possible) cpumask_set_cpu(cpu, &__cpu_possible_mask); else cpumask_clear_cpu(cpu, &__cpu_possible_mask); } static inline void set_cpu_present(unsigned int cpu, bool present) { if (present) cpumask_set_cpu(cpu, &__cpu_present_mask); else cpumask_clear_cpu(cpu, &__cpu_present_mask); } void set_cpu_online(unsigned int cpu, bool online); static inline void set_cpu_active(unsigned int cpu, bool active) { if (active) cpumask_set_cpu(cpu, &__cpu_active_mask); else cpumask_clear_cpu(cpu, &__cpu_active_mask); } /** * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap * * There are a few places where cpumask_var_t isn't appropriate and * static cpumasks must be used (eg. very early boot), yet we don't * expose the definition of 'struct cpumask'. * * This does the conversion, and can be used as a constant initializer. */ #define to_cpumask(bitmap) \ ((struct cpumask *)(1 ? (bitmap) \ : (void *)sizeof(__check_is_bitmap(bitmap)))) static inline int __check_is_bitmap(const unsigned long *bitmap) { return 1; } /* * Special-case data structure for "single bit set only" constant CPU masks. * * We pre-generate all the 64 (or 32) possible bit positions, with enough * padding to the left and the right, and return the constant pointer * appropriately offset. */ extern const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)]; static inline const struct cpumask *get_cpu_mask(unsigned int cpu) { const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; p -= cpu / BITS_PER_LONG; return to_cpumask(p); } #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) #if NR_CPUS <= BITS_PER_LONG #define CPU_BITS_ALL \ { \ [BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS) \ } #else /* NR_CPUS > BITS_PER_LONG */ #define CPU_BITS_ALL \ { \ [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ [BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS) \ } #endif /* NR_CPUS > BITS_PER_LONG */ /** * cpumap_print_to_pagebuf - copies the cpumask into the buffer either * as comma-separated list of cpus or hex values of cpumask * @list: indicates whether the cpumap must be list * @mask: the cpumask to copy * @buf: the buffer to copy into * * Returns the length of the (null-terminated) @buf string, zero if * nothing is copied. */ static inline ssize_t cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) { return bitmap_print_to_pagebuf(list, buf, cpumask_bits(mask), nr_cpu_ids); } #if NR_CPUS <= BITS_PER_LONG #define CPU_MASK_ALL \ (cpumask_t) { { \ [BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS) \ } } #else #define CPU_MASK_ALL \ (cpumask_t) { { \ [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ [BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS) \ } } #endif /* NR_CPUS > BITS_PER_LONG */ #define CPU_MASK_NONE \ (cpumask_t) { { \ [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ } } #define CPU_MASK_CPU0 \ (cpumask_t) { { \ [0] = 1UL \ } } #endif /* __LINUX_CPUMASK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TIMEKEEPING_H #define _LINUX_TIMEKEEPING_H #include <linux/errno.h> /* Included from linux/ktime.h */ void timekeeping_init(void); extern int timekeeping_suspended; /* Architecture timer tick functions: */ extern void update_process_times(int user); extern void xtime_update(unsigned long ticks); /* * Get and set timeofday */ extern int do_settimeofday64(const struct timespec64 *ts); extern int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz); /* * ktime_get() family: read the current time in a multitude of ways, * * The default time reference is CLOCK_MONOTONIC, starting at * boot time but not counting the time spent in suspend. * For other references, use the functions with "real", "clocktai", * "boottime" and "raw" suffixes. * * To get the time in a different format, use the ones wit * "ns", "ts64" and "seconds" suffix. * * See Documentation/core-api/timekeeping.rst for more details. */ /* * timespec64 based interfaces */ extern void ktime_get_raw_ts64(struct timespec64 *ts); extern void ktime_get_ts64(struct timespec64 *ts); extern void ktime_get_real_ts64(struct timespec64 *tv); extern void ktime_get_coarse_ts64(struct timespec64 *ts); extern void ktime_get_coarse_real_ts64(struct timespec64 *ts); void getboottime64(struct timespec64 *ts); /* * time64_t base interfaces */ extern time64_t ktime_get_seconds(void); extern time64_t __ktime_get_real_seconds(void); extern time64_t ktime_get_real_seconds(void); /* * ktime_t based interfaces */ enum tk_offsets { TK_OFFS_REAL, TK_OFFS_BOOT, TK_OFFS_TAI, TK_OFFS_MAX, }; extern ktime_t ktime_get(void); extern ktime_t ktime_get_with_offset(enum tk_offsets offs); extern ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs); extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs); extern ktime_t ktime_get_raw(void); extern u32 ktime_get_resolution_ns(void); /** * ktime_get_real - get the real (wall-) time in ktime_t format */ static inline ktime_t ktime_get_real(void) { return ktime_get_with_offset(TK_OFFS_REAL); } static inline ktime_t ktime_get_coarse_real(void) { return ktime_get_coarse_with_offset(TK_OFFS_REAL); } /** * ktime_get_boottime - Returns monotonic time since boot in ktime_t format * * This is similar to CLOCK_MONTONIC/ktime_get, but also includes the * time spent in suspend. */ static inline ktime_t ktime_get_boottime(void) { return ktime_get_with_offset(TK_OFFS_BOOT); } static inline ktime_t ktime_get_coarse_boottime(void) { return ktime_get_coarse_with_offset(TK_OFFS_BOOT); } /** * ktime_get_clocktai - Returns the TAI time of day in ktime_t format */ static inline ktime_t ktime_get_clocktai(void) { return ktime_get_with_offset(TK_OFFS_TAI); } static inline ktime_t ktime_get_coarse_clocktai(void) { return ktime_get_coarse_with_offset(TK_OFFS_TAI); } static inline ktime_t ktime_get_coarse(void) { struct timespec64 ts; ktime_get_coarse_ts64(&ts); return timespec64_to_ktime(ts); } static inline u64 ktime_get_coarse_ns(void) { return ktime_to_ns(ktime_get_coarse()); } static inline u64 ktime_get_coarse_real_ns(void) { return ktime_to_ns(ktime_get_coarse_real()); } static inline u64 ktime_get_coarse_boottime_ns(void) { return ktime_to_ns(ktime_get_coarse_boottime()); } static inline u64 ktime_get_coarse_clocktai_ns(void) { return ktime_to_ns(ktime_get_coarse_clocktai()); } /** * ktime_mono_to_real - Convert monotonic time to clock realtime */ static inline ktime_t ktime_mono_to_real(ktime_t mono) { return ktime_mono_to_any(mono, TK_OFFS_REAL); } static inline u64 ktime_get_ns(void) { return ktime_to_ns(ktime_get()); } static inline u64 ktime_get_real_ns(void) { return ktime_to_ns(ktime_get_real()); } static inline u64 ktime_get_boottime_ns(void) { return ktime_to_ns(ktime_get_boottime()); } static inline u64 ktime_get_clocktai_ns(void) { return ktime_to_ns(ktime_get_clocktai()); } static inline u64 ktime_get_raw_ns(void) { return ktime_to_ns(ktime_get_raw()); } extern u64 ktime_get_mono_fast_ns(void); extern u64 ktime_get_raw_fast_ns(void); extern u64 ktime_get_boot_fast_ns(void); extern u64 ktime_get_real_fast_ns(void); /* * timespec64/time64_t interfaces utilizing the ktime based ones * for API completeness, these could be implemented more efficiently * if needed. */ static inline void ktime_get_boottime_ts64(struct timespec64 *ts) { *ts = ktime_to_timespec64(ktime_get_boottime()); } static inline void ktime_get_coarse_boottime_ts64(struct timespec64 *ts) { *ts = ktime_to_timespec64(ktime_get_coarse_boottime()); } static inline time64_t ktime_get_boottime_seconds(void) { return ktime_divns(ktime_get_coarse_boottime(), NSEC_PER_SEC); } static inline void ktime_get_clocktai_ts64(struct timespec64 *ts) { *ts = ktime_to_timespec64(ktime_get_clocktai()); } static inline void ktime_get_coarse_clocktai_ts64(struct timespec64 *ts) { *ts = ktime_to_timespec64(ktime_get_coarse_clocktai()); } static inline time64_t ktime_get_clocktai_seconds(void) { return ktime_divns(ktime_get_coarse_clocktai(), NSEC_PER_SEC); } /* * RTC specific */ extern bool timekeeping_rtc_skipsuspend(void); extern bool timekeeping_rtc_skipresume(void); extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); /* * struct ktime_timestanps - Simultaneous mono/boot/real timestamps * @mono: Monotonic timestamp * @boot: Boottime timestamp * @real: Realtime timestamp */ struct ktime_timestamps { u64 mono; u64 boot; u64 real; }; /** * struct system_time_snapshot - simultaneous raw/real time capture with * counter value * @cycles: Clocksource counter value to produce the system times * @real: Realtime system time * @raw: Monotonic raw system time * @clock_was_set_seq: The sequence number of clock was set events * @cs_was_changed_seq: The sequence number of clocksource change events */ struct system_time_snapshot { u64 cycles; ktime_t real; ktime_t raw; unsigned int clock_was_set_seq; u8 cs_was_changed_seq; }; /** * struct system_device_crosststamp - system/device cross-timestamp * (synchronized capture) * @device: Device time * @sys_realtime: Realtime simultaneous with device time * @sys_monoraw: Monotonic raw simultaneous with device time */ struct system_device_crosststamp { ktime_t device; ktime_t sys_realtime; ktime_t sys_monoraw; }; /** * struct system_counterval_t - system counter value with the pointer to the * corresponding clocksource * @cycles: System counter value * @cs: Clocksource corresponding to system counter value. Used by * timekeeping code to verify comparibility of two cycle values */ struct system_counterval_t { u64 cycles; struct clocksource *cs; }; /* * Get cross timestamp between system clock and device clock */ extern int get_device_system_crosststamp( int (*get_time_fn)(ktime_t *device_time, struct system_counterval_t *system_counterval, void *ctx), void *ctx, struct system_time_snapshot *history, struct system_device_crosststamp *xtstamp); /* * Simultaneously snapshot realtime and monotonic raw clocks */ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot); /* NMI safe mono/boot/realtime timestamps */ extern void ktime_get_fast_timestamps(struct ktime_timestamps *snap); /* * Persistent clock related interfaces */ extern int persistent_clock_is_local; extern void read_persistent_clock64(struct timespec64 *ts); void read_persistent_wall_and_boot_offset(struct timespec64 *wall_clock, struct timespec64 *boot_offset); extern int update_persistent_clock64(struct timespec64 now); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __ASM_GENERIC_GETORDER_H #define __ASM_GENERIC_GETORDER_H #ifndef __ASSEMBLY__ #include <linux/compiler.h> #include <linux/log2.h> /** * get_order - Determine the allocation order of a memory size * @size: The size for which to get the order * * Determine the allocation order of a particular sized block of memory. This * is on a logarithmic scale, where: * * 0 -> 2^0 * PAGE_SIZE and below * 1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1 * 2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1 * 3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1 * 4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1 * ... * * The order returned is used to find the smallest allocation granule required * to hold an object of the specified size. * * The result is undefined if the size is 0. */ static inline __attribute_const__ int get_order(unsigned long size) { if (__builtin_constant_p(size)) { if (!size) return BITS_PER_LONG - PAGE_SHIFT; if (size < (1UL << PAGE_SHIFT)) return 0; return ilog2((size) - 1) - PAGE_SHIFT + 1; } size--; size >>= PAGE_SHIFT; #if BITS_PER_LONG == 32 return fls(size); #else return fls64(size); #endif } #endif /* __ASSEMBLY__ */ #endif /* __ASM_GENERIC_GETORDER_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MEMREMAP_H_ #define _LINUX_MEMREMAP_H_ #include <linux/range.h> #include <linux/ioport.h> #include <linux/percpu-refcount.h> struct resource; struct device; /** * struct vmem_altmap - pre-allocated storage for vmemmap_populate * @base_pfn: base of the entire dev_pagemap mapping * @reserve: pages mapped, but reserved for driver use (relative to @base) * @free: free pages set aside in the mapping for memmap storage * @align: pages reserved to meet allocation alignments * @alloc: track pages consumed, private to vmemmap_populate() */ struct vmem_altmap { const unsigned long base_pfn; const unsigned long end_pfn; const unsigned long reserve; unsigned long free; unsigned long align; unsigned long alloc; }; /* * Specialize ZONE_DEVICE memory into multiple types each having differents * usage. * * MEMORY_DEVICE_PRIVATE: * Device memory that is not directly addressable by the CPU: CPU can neither * read nor write private memory. In this case, we do still have struct pages * backing the device memory. Doing so simplifies the implementation, but it is * important to remember that there are certain points at which the struct page * must be treated as an opaque object, rather than a "normal" struct page. * * A more complete discussion of unaddressable memory may be found in * include/linux/hmm.h and Documentation/vm/hmm.rst. * * MEMORY_DEVICE_FS_DAX: * Host memory that has similar access semantics as System RAM i.e. DMA * coherent and supports page pinning. In support of coordinating page * pinning vs other operations MEMORY_DEVICE_FS_DAX arranges for a * wakeup event whenever a page is unpinned and becomes idle. This * wakeup is used to coordinate physical address space management (ex: * fs truncate/hole punch) vs pinned pages (ex: device dma). * * MEMORY_DEVICE_GENERIC: * Host memory that has similar access semantics as System RAM i.e. DMA * coherent and supports page pinning. This is for example used by DAX devices * that expose memory using a character device. * * MEMORY_DEVICE_PCI_P2PDMA: * Device memory residing in a PCI BAR intended for use with Peer-to-Peer * transactions. */ enum memory_type { /* 0 is reserved to catch uninitialized type fields */ MEMORY_DEVICE_PRIVATE = 1, MEMORY_DEVICE_FS_DAX, MEMORY_DEVICE_GENERIC, MEMORY_DEVICE_PCI_P2PDMA, }; struct dev_pagemap_ops { /* * Called once the page refcount reaches 1. (ZONE_DEVICE pages never * reach 0 refcount unless there is a refcount bug. This allows the * device driver to implement its own memory management.) */ void (*page_free)(struct page *page); /* * Transition the refcount in struct dev_pagemap to the dead state. */ void (*kill)(struct dev_pagemap *pgmap); /* * Wait for refcount in struct dev_pagemap to be idle and reap it. */ void (*cleanup)(struct dev_pagemap *pgmap); /* * Used for private (un-addressable) device memory only. Must migrate * the page back to a CPU accessible page. */ vm_fault_t (*migrate_to_ram)(struct vm_fault *vmf); }; #define PGMAP_ALTMAP_VALID (1 << 0) /** * struct dev_pagemap - metadata for ZONE_DEVICE mappings * @altmap: pre-allocated/reserved memory for vmemmap allocations * @ref: reference count that pins the devm_memremap_pages() mapping * @internal_ref: internal reference if @ref is not provided by the caller * @done: completion for @internal_ref * @type: memory type: see MEMORY_* in memory_hotplug.h * @flags: PGMAP_* flags to specify defailed behavior * @ops: method table * @owner: an opaque pointer identifying the entity that manages this * instance. Used by various helpers to make sure that no * foreign ZONE_DEVICE memory is accessed. * @nr_range: number of ranges to be mapped * @range: range to be mapped when nr_range == 1 * @ranges: array of ranges to be mapped when nr_range > 1 */ struct dev_pagemap { struct vmem_altmap altmap; struct percpu_ref *ref; struct percpu_ref internal_ref; struct completion done; enum memory_type type; unsigned int flags; const struct dev_pagemap_ops *ops; void *owner; int nr_range; union { struct range range; struct range ranges[0]; }; }; static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) { if (pgmap->flags & PGMAP_ALTMAP_VALID) return &pgmap->altmap; return NULL; } #ifdef CONFIG_ZONE_DEVICE void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap); struct dev_pagemap *get_dev_pagemap(unsigned long pfn, struct dev_pagemap *pgmap); bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn); unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); unsigned long memremap_compat_align(void); #else static inline void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { /* * Fail attempts to call devm_memremap_pages() without * ZONE_DEVICE support enabled, this requires callers to fall * back to plain devm_memremap() based on config */ WARN_ON_ONCE(1); return ERR_PTR(-ENXIO); } static inline void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap) { } static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn, struct dev_pagemap *pgmap) { return NULL; } static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn) { return false; } static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) { return 0; } static inline void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns) { } /* when memremap_pages() is disabled all archs can remap a single page */ static inline unsigned long memremap_compat_align(void) { return PAGE_SIZE; } #endif /* CONFIG_ZONE_DEVICE */ static inline void put_dev_pagemap(struct dev_pagemap *pgmap) { if (pgmap) percpu_ref_put(pgmap->ref); } #endif /* _LINUX_MEMREMAP_H_ */
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM x86_fpu #if !defined(_TRACE_FPU_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_FPU_H #include <linux/tracepoint.h> DECLARE_EVENT_CLASS(x86_fpu, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu), TP_STRUCT__entry( __field(struct fpu *, fpu) __field(bool, load_fpu) __field(u64, xfeatures) __field(u64, xcomp_bv) ), TP_fast_assign( __entry->fpu = fpu; __entry->load_fpu = test_thread_flag(TIF_NEED_FPU_LOAD); if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { __entry->xfeatures = fpu->state.xsave.header.xfeatures; __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; } ), TP_printk("x86/fpu: %p load: %d xfeatures: %llx xcomp_bv: %llx", __entry->fpu, __entry->load_fpu, __entry->xfeatures, __entry->xcomp_bv ) ); DEFINE_EVENT(x86_fpu, x86_fpu_before_save, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_after_save, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_before_restore, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_after_restore, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_regs_activated, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_init_state, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_dropped, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_copy_src, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_copy_dst, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_xstate_check_failed, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH asm/trace/ #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE fpu #endif /* _TRACE_FPU_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_BITMAP_H #define __LINUX_BITMAP_H #ifndef __ASSEMBLY__ #include <linux/types.h> #include <linux/bitops.h> #include <linux/string.h> #include <linux/kernel.h> /* * bitmaps provide bit arrays that consume one or more unsigned * longs. The bitmap interface and available operations are listed * here, in bitmap.h * * Function implementations generic to all architectures are in * lib/bitmap.c. Functions implementations that are architecture * specific are in various include/asm-<arch>/bitops.h headers * and other arch/<arch> specific files. * * See lib/bitmap.c for more details. */ /** * DOC: bitmap overview * * The available bitmap operations and their rough meaning in the * case that the bitmap is a single unsigned long are thus: * * The generated code is more efficient when nbits is known at * compile-time and at most BITS_PER_LONG. * * :: * * bitmap_zero(dst, nbits) *dst = 0UL * bitmap_fill(dst, nbits) *dst = ~0UL * bitmap_copy(dst, src, nbits) *dst = *src * bitmap_and(dst, src1, src2, nbits) *dst = *src1 & *src2 * bitmap_or(dst, src1, src2, nbits) *dst = *src1 | *src2 * bitmap_xor(dst, src1, src2, nbits) *dst = *src1 ^ *src2 * bitmap_andnot(dst, src1, src2, nbits) *dst = *src1 & ~(*src2) * bitmap_complement(dst, src, nbits) *dst = ~(*src) * bitmap_equal(src1, src2, nbits) Are *src1 and *src2 equal? * bitmap_intersects(src1, src2, nbits) Do *src1 and *src2 overlap? * bitmap_subset(src1, src2, nbits) Is *src1 a subset of *src2? * bitmap_empty(src, nbits) Are all bits zero in *src? * bitmap_full(src, nbits) Are all bits set in *src? * bitmap_weight(src, nbits) Hamming Weight: number set bits * bitmap_set(dst, pos, nbits) Set specified bit area * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area * bitmap_find_next_zero_area_off(buf, len, pos, n, mask, mask_off) as above * bitmap_next_clear_region(map, &start, &end, nbits) Find next clear region * bitmap_next_set_region(map, &start, &end, nbits) Find next set region * bitmap_for_each_clear_region(map, rs, re, start, end) * Iterate over all clear regions * bitmap_for_each_set_region(map, rs, re, start, end) * Iterate over all set regions * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n * bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest * bitmap_replace(dst, old, new, mask, nbits) *dst = (*old & ~(*mask)) | (*new & *mask) * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) * bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap * bitmap_fold(dst, orig, sz, nbits) dst bits = orig bits mod sz * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from kernel buf * bitmap_parselist_user(buf, dst, nbits) Parse bitmap dst from user buf * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region * bitmap_release_region(bitmap, pos, order) Free specified bit region * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region * bitmap_from_arr32(dst, buf, nbits) Copy nbits from u32[] buf to dst * bitmap_to_arr32(buf, src, nbits) Copy nbits from buf to u32[] dst * bitmap_get_value8(map, start) Get 8bit value from map at start * bitmap_set_value8(map, value, start) Set 8bit value to map at start * * Note, bitmap_zero() and bitmap_fill() operate over the region of * unsigned longs, that is, bits behind bitmap till the unsigned long * boundary will be zeroed or filled as well. Consider to use * bitmap_clear() or bitmap_set() to make explicit zeroing or filling * respectively. */ /** * DOC: bitmap bitops * * Also the following operations in asm/bitops.h apply to bitmaps.:: * * set_bit(bit, addr) *addr |= bit * clear_bit(bit, addr) *addr &= ~bit * change_bit(bit, addr) *addr ^= bit * test_bit(bit, addr) Is bit set in *addr? * test_and_set_bit(bit, addr) Set bit and return old value * test_and_clear_bit(bit, addr) Clear bit and return old value * test_and_change_bit(bit, addr) Change bit and return old value * find_first_zero_bit(addr, nbits) Position first zero bit in *addr * find_first_bit(addr, nbits) Position first set bit in *addr * find_next_zero_bit(addr, nbits, bit) * Position next zero bit in *addr >= bit * find_next_bit(addr, nbits, bit) Position next set bit in *addr >= bit * find_next_and_bit(addr1, addr2, nbits, bit) * Same as find_next_bit, but in * (*addr1 & *addr2) * */ /** * DOC: declare bitmap * The DECLARE_BITMAP(name,bits) macro, in linux/types.h, can be used * to declare an array named 'name' of just enough unsigned longs to * contain all bit positions from 0 to 'bits' - 1. */ /* * Allocation and deallocation of bitmap. * Provided in lib/bitmap.c to avoid circular dependency. */ extern unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags); extern unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags); extern void bitmap_free(const unsigned long *bitmap); /* * lib/bitmap.c provides these functions: */ extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits); extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits); extern int __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern bool __pure __bitmap_or_equal(const unsigned long *src1, const unsigned long *src2, const unsigned long *src3, unsigned int nbits); extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int nbits); extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits); extern void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits); extern void bitmap_cut(unsigned long *dst, const unsigned long *src, unsigned int first, unsigned int cut, unsigned int nbits); extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_replace(unsigned long *dst, const unsigned long *old, const unsigned long *new, const unsigned long *mask, unsigned int nbits); extern int __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_subset(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); extern void __bitmap_set(unsigned long *map, unsigned int start, int len); extern void __bitmap_clear(unsigned long *map, unsigned int start, int len); extern unsigned long bitmap_find_next_zero_area_off(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, unsigned long align_mask, unsigned long align_offset); /** * bitmap_find_next_zero_area - find a contiguous aligned zero area * @map: The address to base the search on * @size: The bitmap size in bits * @start: The bitnumber to start searching at * @nr: The number of zeroed bits we're looking for * @align_mask: Alignment mask for zero area * * The @align_mask should be one less than a power of 2; the effect is that * the bit offset of all zero areas this function finds is multiples of that * power of 2. A @align_mask of 0 means no alignment is required. */ static inline unsigned long bitmap_find_next_zero_area(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, unsigned long align_mask) { return bitmap_find_next_zero_area_off(map, size, start, nr, align_mask, 0); } extern int bitmap_parse(const char *buf, unsigned int buflen, unsigned long *dst, int nbits); extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); extern int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits); extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); extern void bitmap_remap(unsigned long *dst, const unsigned long *src, const unsigned long *old, const unsigned long *new, unsigned int nbits); extern int bitmap_bitremap(int oldbit, const unsigned long *old, const unsigned long *new, int bits); extern void bitmap_onto(unsigned long *dst, const unsigned long *orig, const unsigned long *relmap, unsigned int bits); extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, unsigned int sz, unsigned int nbits); extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); #ifdef __BIG_ENDIAN extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); #else #define bitmap_copy_le bitmap_copy #endif extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits); extern int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) /* * The static inlines below do not handle constant nbits==0 correctly, * so make such users (should any ever turn up) call the out-of-line * versions. */ #define small_const_nbits(nbits) \ (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0) static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) { unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); memset(dst, 0, len); } static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) { unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); memset(dst, 0xff, len); } static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned int nbits) { unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); memcpy(dst, src, len); } /* * Copy bitmap and clear tail bits in last word. */ static inline void bitmap_copy_clear_tail(unsigned long *dst, const unsigned long *src, unsigned int nbits) { bitmap_copy(dst, src, nbits); if (nbits % BITS_PER_LONG) dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits); } /* * On 32-bit systems bitmaps are represented as u32 arrays internally, and * therefore conversion is not needed when copying data from/to arrays of u32. */ #if BITS_PER_LONG == 64 extern void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf, unsigned int nbits); extern void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, unsigned int nbits); #else #define bitmap_from_arr32(bitmap, buf, nbits) \ bitmap_copy_clear_tail((unsigned long *) (bitmap), \ (const unsigned long *) (buf), (nbits)) #define bitmap_to_arr32(buf, bitmap, nbits) \ bitmap_copy_clear_tail((unsigned long *) (buf), \ (const unsigned long *) (bitmap), (nbits)) #endif static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0; return __bitmap_and(dst, src1, src2, nbits); } static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = *src1 | *src2; else __bitmap_or(dst, src1, src2, nbits); } static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = *src1 ^ *src2; else __bitmap_xor(dst, src1, src2, nbits); } static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; return __bitmap_andnot(dst, src1, src2, nbits); } static inline void bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = ~(*src); else __bitmap_complement(dst, src, nbits); } #ifdef __LITTLE_ENDIAN #define BITMAP_MEM_ALIGNMENT 8 #else #define BITMAP_MEM_ALIGNMENT (8 * sizeof(unsigned long)) #endif #define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1) static inline int bitmap_equal(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); if (__builtin_constant_p(nbits & BITMAP_MEM_MASK) && IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) return !memcmp(src1, src2, nbits / 8); return __bitmap_equal(src1, src2, nbits); } /** * bitmap_or_equal - Check whether the or of two bitmaps is equal to a third * @src1: Pointer to bitmap 1 * @src2: Pointer to bitmap 2 will be or'ed with bitmap 1 * @src3: Pointer to bitmap 3. Compare to the result of *@src1 | *@src2 * @nbits: number of bits in each of these bitmaps * * Returns: True if (*@src1 | *@src2) == *@src3, false otherwise */ static inline bool bitmap_or_equal(const unsigned long *src1, const unsigned long *src2, const unsigned long *src3, unsigned int nbits) { if (!small_const_nbits(nbits)) return __bitmap_or_equal(src1, src2, src3, nbits); return !(((*src1 | *src2) ^ *src3) & BITMAP_LAST_WORD_MASK(nbits)); } static inline int bitmap_intersects(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; else return __bitmap_intersects(src1, src2, nbits); } static inline int bitmap_subset(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); else return __bitmap_subset(src1, src2, nbits); } static inline int bitmap_empty(const unsigned long *src, unsigned nbits) { if (small_const_nbits(nbits)) return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); return find_first_bit(src, nbits) == nbits; } static inline int bitmap_full(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); return find_first_zero_bit(src, nbits) == nbits; } static __always_inline int bitmap_weight(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); return __bitmap_weight(src, nbits); } static __always_inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) { if (__builtin_constant_p(nbits) && nbits == 1) __set_bit(start, map); else if (__builtin_constant_p(start & BITMAP_MEM_MASK) && IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) && __builtin_constant_p(nbits & BITMAP_MEM_MASK) && IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) memset((char *)map + start / 8, 0xff, nbits / 8); else __bitmap_set(map, start, nbits); } static __always_inline void bitmap_clear(unsigned long *map, unsigned int start, unsigned int nbits) { if (__builtin_constant_p(nbits) && nbits == 1) __clear_bit(start, map); else if (__builtin_constant_p(start & BITMAP_MEM_MASK) && IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) && __builtin_constant_p(nbits & BITMAP_MEM_MASK) && IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) memset((char *)map + start / 8, 0, nbits / 8); else __bitmap_clear(map, start, nbits); } static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift; else __bitmap_shift_right(dst, src, shift, nbits); } static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*src << shift) & BITMAP_LAST_WORD_MASK(nbits); else __bitmap_shift_left(dst, src, shift, nbits); } static inline void bitmap_replace(unsigned long *dst, const unsigned long *old, const unsigned long *new, const unsigned long *mask, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*old & ~(*mask)) | (*new & *mask); else __bitmap_replace(dst, old, new, mask, nbits); } static inline void bitmap_next_clear_region(unsigned long *bitmap, unsigned int *rs, unsigned int *re, unsigned int end) { *rs = find_next_zero_bit(bitmap, end, *rs); *re = find_next_bit(bitmap, end, *rs + 1); } static inline void bitmap_next_set_region(unsigned long *bitmap, unsigned int *rs, unsigned int *re, unsigned int end) { *rs = find_next_bit(bitmap, end, *rs); *re = find_next_zero_bit(bitmap, end, *rs + 1); } /* * Bitmap region iterators. Iterates over the bitmap between [@start, @end). * @rs and @re should be integer variables and will be set to start and end * index of the current clear or set region. */ #define bitmap_for_each_clear_region(bitmap, rs, re, start, end) \ for ((rs) = (start), \ bitmap_next_clear_region((bitmap), &(rs), &(re), (end)); \ (rs) < (re); \ (rs) = (re) + 1, \ bitmap_next_clear_region((bitmap), &(rs), &(re), (end))) #define bitmap_for_each_set_region(bitmap, rs, re, start, end) \ for ((rs) = (start), \ bitmap_next_set_region((bitmap), &(rs), &(re), (end)); \ (rs) < (re); \ (rs) = (re) + 1, \ bitmap_next_set_region((bitmap), &(rs), &(re), (end))) /** * BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap. * @n: u64 value * * Linux bitmaps are internally arrays of unsigned longs, i.e. 32-bit * integers in 32-bit environment, and 64-bit integers in 64-bit one. * * There are four combinations of endianness and length of the word in linux * ABIs: LE64, BE64, LE32 and BE32. * * On 64-bit kernels 64-bit LE and BE numbers are naturally ordered in * bitmaps and therefore don't require any special handling. * * On 32-bit kernels 32-bit LE ABI orders lo word of 64-bit number in memory * prior to hi, and 32-bit BE orders hi word prior to lo. The bitmap on the * other hand is represented as an array of 32-bit words and the position of * bit N may therefore be calculated as: word #(N/32) and bit #(N%32) in that * word. For example, bit #42 is located at 10th position of 2nd word. * It matches 32-bit LE ABI, and we can simply let the compiler store 64-bit * values in memory as it usually does. But for BE we need to swap hi and lo * words manually. * * With all that, the macro BITMAP_FROM_U64() does explicit reordering of hi and * lo parts of u64. For LE32 it does nothing, and for BE environment it swaps * hi and lo words, as is expected by bitmap. */ #if __BITS_PER_LONG == 64 #define BITMAP_FROM_U64(n) (n) #else #define BITMAP_FROM_U64(n) ((unsigned long) ((u64)(n) & ULONG_MAX)), \ ((unsigned long) ((u64)(n) >> 32)) #endif /** * bitmap_from_u64 - Check and swap words within u64. * @mask: source bitmap * @dst: destination bitmap * * In 32-bit Big Endian kernel, when using ``(u32 *)(&val)[*]`` * to read u64 mask, we will get the wrong word. * That is ``(u32 *)(&val)[0]`` gets the upper 32 bits, * but we expect the lower 32-bits of u64. */ static inline void bitmap_from_u64(unsigned long *dst, u64 mask) { dst[0] = mask & ULONG_MAX; if (sizeof(mask) > sizeof(unsigned long)) dst[1] = mask >> 32; } /** * bitmap_get_value8 - get an 8-bit value within a memory region * @map: address to the bitmap memory region * @start: bit offset of the 8-bit value; must be a multiple of 8 * * Returns the 8-bit value located at the @start bit offset within the @src * memory region. */ static inline unsigned long bitmap_get_value8(const unsigned long *map, unsigned long start) { const size_t index = BIT_WORD(start); const unsigned long offset = start % BITS_PER_LONG; return (map[index] >> offset) & 0xFF; } /** * bitmap_set_value8 - set an 8-bit value within a memory region * @map: address to the bitmap memory region * @value: the 8-bit value; values wider than 8 bits may clobber bitmap * @start: bit offset of the 8-bit value; must be a multiple of 8 */ static inline void bitmap_set_value8(unsigned long *map, unsigned long value, unsigned long start) { const size_t index = BIT_WORD(start); const unsigned long offset = start % BITS_PER_LONG; map[index] &= ~(0xFFUL << offset); map[index] |= value << offset; } #endif /* __ASSEMBLY__ */ #endif /* __LINUX_BITMAP_H */
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 /* SPDX-License-Identifier: GPL-2.0 */ /* * descriptor table internals; you almost certainly want file.h instead. */ #ifndef __LINUX_FDTABLE_H #define __LINUX_FDTABLE_H #include <linux/posix_types.h> #include <linux/compiler.h> #include <linux/spinlock.h> #include <linux/rcupdate.h> #include <linux/nospec.h> #include <linux/types.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/atomic.h> /* * The default fd array needs to be at least BITS_PER_LONG, * as this is the granularity returned by copy_fdset(). */ #define NR_OPEN_DEFAULT BITS_PER_LONG #define NR_OPEN_MAX ~0U struct fdtable { unsigned int max_fds; struct file __rcu **fd; /* current fd array */ unsigned long *close_on_exec; unsigned long *open_fds; unsigned long *full_fds_bits; struct rcu_head rcu; }; static inline bool close_on_exec(unsigned int fd, const struct fdtable *fdt) { return test_bit(fd, fdt->close_on_exec); } static inline bool fd_is_open(unsigned int fd, const struct fdtable *fdt) { return test_bit(fd, fdt->open_fds); } /* * Open file table structure */ struct files_struct { /* * read mostly part */ atomic_t count; bool resize_in_progress; wait_queue_head_t resize_wait; struct fdtable __rcu *fdt; struct fdtable fdtab; /* * written part on a separate cache line in SMP */ spinlock_t file_lock ____cacheline_aligned_in_smp; unsigned int next_fd; unsigned long close_on_exec_init[1]; unsigned long open_fds_init[1]; unsigned long full_fds_bits_init[1]; struct file __rcu * fd_array[NR_OPEN_DEFAULT]; }; struct file_operations; struct vfsmount; struct dentry; #define rcu_dereference_check_fdtable(files, fdtfd) \ rcu_dereference_check((fdtfd), lockdep_is_held(&(files)->file_lock)) #define files_fdtable(files) \ rcu_dereference_check_fdtable((files), (files)->fdt) /* * The caller must ensure that fd table isn't shared or hold rcu or file lock */ static inline struct file *__fcheck_files(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = rcu_dereference_raw(files->fdt); if (fd < fdt->max_fds) { fd = array_index_nospec(fd, fdt->max_fds); return rcu_dereference_raw(fdt->fd[fd]); } return NULL; } static inline struct file *fcheck_files(struct files_struct *files, unsigned int fd) { RCU_LOCKDEP_WARN(!rcu_read_lock_held() && !lockdep_is_held(&files->file_lock), "suspicious rcu_dereference_check() usage"); return __fcheck_files(files, fd); } /* * Check whether the specified fd has an open file. */ #define fcheck(fd) fcheck_files(current->files, fd) struct task_struct; struct files_struct *get_files_struct(struct task_struct *); void put_files_struct(struct files_struct *fs); void reset_files_struct(struct files_struct *); int unshare_files(struct files_struct **); struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy; void do_close_on_exec(struct files_struct *); int iterate_fd(struct files_struct *, unsigned, int (*)(const void *, struct file *, unsigned), const void *); extern int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags); extern void __fd_install(struct files_struct *files, unsigned int fd, struct file *file); extern int __close_fd(struct files_struct *files, unsigned int fd); extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); extern int __close_fd_get_file(unsigned int fd, struct file **res); extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, struct files_struct **new_fdp); extern struct kmem_cache *files_cachep; #endif /* __LINUX_FDTABLE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 /* SPDX-License-Identifier: GPL-2.0 */ /* * ioport.h Definitions of routines for detecting, reserving and * allocating system resources. * * Authors: Linus Torvalds */ #ifndef _LINUX_IOPORT_H #define _LINUX_IOPORT_H #ifndef __ASSEMBLY__ #include <linux/compiler.h> #include <linux/types.h> #include <linux/bits.h> /* * Resources are tree-like, allowing * nesting etc.. */ struct resource { resource_size_t start; resource_size_t end; const char *name; unsigned long flags; unsigned long desc; struct resource *parent, *sibling, *child; }; /* * IO resources have these defined flags. * * PCI devices expose these flags to userspace in the "resource" sysfs file, * so don't move them. */ #define IORESOURCE_BITS 0x000000ff /* Bus-specific bits */ #define IORESOURCE_TYPE_BITS 0x00001f00 /* Resource type */ #define IORESOURCE_IO 0x00000100 /* PCI/ISA I/O ports */ #define IORESOURCE_MEM 0x00000200 #define IORESOURCE_REG 0x00000300 /* Register offsets */ #define IORESOURCE_IRQ 0x00000400 #define IORESOURCE_DMA 0x00000800 #define IORESOURCE_BUS 0x00001000 #define IORESOURCE_PREFETCH 0x00002000 /* No side effects */ #define IORESOURCE_READONLY 0x00004000 #define IORESOURCE_CACHEABLE 0x00008000 #define IORESOURCE_RANGELENGTH 0x00010000 #define IORESOURCE_SHADOWABLE 0x00020000 #define IORESOURCE_SIZEALIGN 0x00040000 /* size indicates alignment */ #define IORESOURCE_STARTALIGN 0x00080000 /* start field is alignment */ #define IORESOURCE_MEM_64 0x00100000 #define IORESOURCE_WINDOW 0x00200000 /* forwarded by bridge */ #define IORESOURCE_MUXED 0x00400000 /* Resource is software muxed */ #define IORESOURCE_EXT_TYPE_BITS 0x01000000 /* Resource extended types */ #define IORESOURCE_SYSRAM 0x01000000 /* System RAM (modifier) */ /* IORESOURCE_SYSRAM specific bits. */ #define IORESOURCE_SYSRAM_DRIVER_MANAGED 0x02000000 /* Always detected via a driver. */ #define IORESOURCE_SYSRAM_MERGEABLE 0x04000000 /* Resource can be merged. */ #define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */ #define IORESOURCE_DISABLED 0x10000000 #define IORESOURCE_UNSET 0x20000000 /* No address assigned yet */ #define IORESOURCE_AUTO 0x40000000 #define IORESOURCE_BUSY 0x80000000 /* Driver has marked this resource busy */ /* I/O resource extended types */ #define IORESOURCE_SYSTEM_RAM (IORESOURCE_MEM|IORESOURCE_SYSRAM) /* PnP IRQ specific bits (IORESOURCE_BITS) */ #define IORESOURCE_IRQ_HIGHEDGE (1<<0) #define IORESOURCE_IRQ_LOWEDGE (1<<1) #define IORESOURCE_IRQ_HIGHLEVEL (1<<2) #define IORESOURCE_IRQ_LOWLEVEL (1<<3) #define IORESOURCE_IRQ_SHAREABLE (1<<4) #define IORESOURCE_IRQ_OPTIONAL (1<<5) /* PnP DMA specific bits (IORESOURCE_BITS) */ #define IORESOURCE_DMA_TYPE_MASK (3<<0) #define IORESOURCE_DMA_8BIT (0<<0) #define IORESOURCE_DMA_8AND16BIT (1<<0) #define IORESOURCE_DMA_16BIT (2<<0) #define IORESOURCE_DMA_MASTER (1<<2) #define IORESOURCE_DMA_BYTE (1<<3) #define IORESOURCE_DMA_WORD (1<<4) #define IORESOURCE_DMA_SPEED_MASK (3<<6) #define IORESOURCE_DMA_COMPATIBLE (0<<6) #define IORESOURCE_DMA_TYPEA (1<<6) #define IORESOURCE_DMA_TYPEB (2<<6) #define IORESOURCE_DMA_TYPEF (3<<6) /* PnP memory I/O specific bits (IORESOURCE_BITS) */ #define IORESOURCE_MEM_WRITEABLE (1<<0) /* dup: IORESOURCE_READONLY */ #define IORESOURCE_MEM_CACHEABLE (1<<1) /* dup: IORESOURCE_CACHEABLE */ #define IORESOURCE_MEM_RANGELENGTH (1<<2) /* dup: IORESOURCE_RANGELENGTH */ #define IORESOURCE_MEM_TYPE_MASK (3<<3) #define IORESOURCE_MEM_8BIT (0<<3) #define IORESOURCE_MEM_16BIT (1<<3) #define IORESOURCE_MEM_8AND16BIT (2<<3) #define IORESOURCE_MEM_32BIT (3<<3) #define IORESOURCE_MEM_SHADOWABLE (1<<5) /* dup: IORESOURCE_SHADOWABLE */ #define IORESOURCE_MEM_EXPANSIONROM (1<<6) /* PnP I/O specific bits (IORESOURCE_BITS) */ #define IORESOURCE_IO_16BIT_ADDR (1<<0) #define IORESOURCE_IO_FIXED (1<<1) #define IORESOURCE_IO_SPARSE (1<<2) /* PCI ROM control bits (IORESOURCE_BITS) */ #define IORESOURCE_ROM_ENABLE (1<<0) /* ROM is enabled, same as PCI_ROM_ADDRESS_ENABLE */ #define IORESOURCE_ROM_SHADOW (1<<1) /* Use RAM image, not ROM BAR */ /* PCI control bits. Shares IORESOURCE_BITS with above PCI ROM. */ #define IORESOURCE_PCI_FIXED (1<<4) /* Do not move resource */ #define IORESOURCE_PCI_EA_BEI (1<<5) /* BAR Equivalent Indicator */ /* * I/O Resource Descriptors * * Descriptors are used by walk_iomem_res_desc() and region_intersects() * for searching a specific resource range in the iomem table. Assign * a new descriptor when a resource range supports the search interfaces. * Otherwise, resource.desc must be set to IORES_DESC_NONE (0). */ enum { IORES_DESC_NONE = 0, IORES_DESC_CRASH_KERNEL = 1, IORES_DESC_ACPI_TABLES = 2, IORES_DESC_ACPI_NV_STORAGE = 3, IORES_DESC_PERSISTENT_MEMORY = 4, IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, IORES_DESC_DEVICE_PRIVATE_MEMORY = 6, IORES_DESC_RESERVED = 7, IORES_DESC_SOFT_RESERVED = 8, }; /* * Flags controlling ioremap() behavior. */ enum { IORES_MAP_SYSTEM_RAM = BIT(0), IORES_MAP_ENCRYPTED = BIT(1), }; /* helpers to define resources */ #define DEFINE_RES_NAMED(_start, _size, _name, _flags) \ { \ .start = (_start), \ .end = (_start) + (_size) - 1, \ .name = (_name), \ .flags = (_flags), \ .desc = IORES_DESC_NONE, \ } #define DEFINE_RES_IO_NAMED(_start, _size, _name) \ DEFINE_RES_NAMED((_start), (_size), (_name), IORESOURCE_IO) #define DEFINE_RES_IO(_start, _size) \ DEFINE_RES_IO_NAMED((_start), (_size), NULL) #define DEFINE_RES_MEM_NAMED(_start, _size, _name) \ DEFINE_RES_NAMED((_start), (_size), (_name), IORESOURCE_MEM) #define DEFINE_RES_MEM(_start, _size) \ DEFINE_RES_MEM_NAMED((_start), (_size), NULL) #define DEFINE_RES_IRQ_NAMED(_irq, _name) \ DEFINE_RES_NAMED((_irq), 1, (_name), IORESOURCE_IRQ) #define DEFINE_RES_IRQ(_irq) \ DEFINE_RES_IRQ_NAMED((_irq), NULL) #define DEFINE_RES_DMA_NAMED(_dma, _name) \ DEFINE_RES_NAMED((_dma), 1, (_name), IORESOURCE_DMA) #define DEFINE_RES_DMA(_dma) \ DEFINE_RES_DMA_NAMED((_dma), NULL) /* PC/ISA/whatever - the normal PC address spaces: IO and memory */ extern struct resource ioport_resource; extern struct resource iomem_resource; extern struct resource *request_resource_conflict(struct resource *root, struct resource *new); extern int request_resource(struct resource *root, struct resource *new); extern int release_resource(struct resource *new); void release_child_resources(struct resource *new); extern void reserve_region_with_split(struct resource *root, resource_size_t start, resource_size_t end, const char *name); extern struct resource *insert_resource_conflict(struct resource *parent, struct resource *new); extern int insert_resource(struct resource *parent, struct resource *new); extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new); extern int remove_resource(struct resource *old); extern void arch_remove_reservations(struct resource *avail); extern int allocate_resource(struct resource *root, struct resource *new, resource_size_t size, resource_size_t min, resource_size_t max, resource_size_t align, resource_size_t (*alignf)(void *, const struct resource *, resource_size_t, resource_size_t), void *alignf_data); struct resource *lookup_resource(struct resource *root, resource_size_t start); int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size); resource_size_t resource_alignment(struct resource *res); static inline resource_size_t resource_size(const struct resource *res) { return res->end - res->start + 1; } static inline unsigned long resource_type(const struct resource *res) { return res->flags & IORESOURCE_TYPE_BITS; } static inline unsigned long resource_ext_type(const struct resource *res) { return res->flags & IORESOURCE_EXT_TYPE_BITS; } /* True iff r1 completely contains r2 */ static inline bool resource_contains(struct resource *r1, struct resource *r2) { if (resource_type(r1) != resource_type(r2)) return false; if (r1->flags & IORESOURCE_UNSET || r2->flags & IORESOURCE_UNSET) return false; return r1->start <= r2->start && r1->end >= r2->end; } /* Convenience shorthand with allocation */ #define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), 0) #define request_muxed_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), IORESOURCE_MUXED) #define __request_mem_region(start,n,name, excl) __request_region(&iomem_resource, (start), (n), (name), excl) #define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0) #define request_mem_region_exclusive(start,n,name) \ __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_EXCLUSIVE) #define rename_region(region, newname) do { (region)->name = (newname); } while (0) extern struct resource * __request_region(struct resource *, resource_size_t start, resource_size_t n, const char *name, int flags); /* Compatibility cruft */ #define release_region(start,n) __release_region(&ioport_resource, (start), (n)) #define release_mem_region(start,n) __release_region(&iomem_resource, (start), (n)) extern void __release_region(struct resource *, resource_size_t, resource_size_t); #ifdef CONFIG_MEMORY_HOTREMOVE extern void release_mem_region_adjustable(resource_size_t, resource_size_t); #endif #ifdef CONFIG_MEMORY_HOTPLUG extern void merge_system_ram_resource(struct resource *res); #endif /* Wrappers for managed devices */ struct device; extern int devm_request_resource(struct device *dev, struct resource *root, struct resource *new); extern void devm_release_resource(struct device *dev, struct resource *new); #define devm_request_region(dev,start,n,name) \ __devm_request_region(dev, &ioport_resource, (start), (n), (name)) #define devm_request_mem_region(dev,start,n,name) \ __devm_request_region(dev, &iomem_resource, (start), (n), (name)) extern struct resource * __devm_request_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n, const char *name); #define devm_release_region(dev, start, n) \ __devm_release_region(dev, &ioport_resource, (start), (n)) #define devm_release_mem_region(dev, start, n) \ __devm_release_region(dev, &iomem_resource, (start), (n)) extern void __devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n); extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size); extern bool iomem_is_exclusive(u64 addr); extern int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)); extern int walk_mem_res(u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)); extern int walk_system_ram_res(u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)); extern int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)); /* True if any part of r1 overlaps r2 */ static inline bool resource_overlaps(struct resource *r1, struct resource *r2) { return (r1->start <= r2->end && r1->end >= r2->start); } struct resource *devm_request_free_mem_region(struct device *dev, struct resource *base, unsigned long size); struct resource *request_free_mem_region(struct resource *base, unsigned long size, const char *name); #ifdef CONFIG_IO_STRICT_DEVMEM void revoke_devmem(struct resource *res); #else static inline void revoke_devmem(struct resource *res) { }; #endif #endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 /* SPDX-License-Identifier: GPL-2.0-only */ /* * async.h: Asynchronous function calls for boot performance * * (C) Copyright 2009 Intel Corporation * Author: Arjan van de Ven <arjan@linux.intel.com> */ #ifndef __ASYNC_H__ #define __ASYNC_H__ #include <linux/types.h> #include <linux/list.h> #include <linux/numa.h> #include <linux/device.h> typedef u64 async_cookie_t; typedef void (*async_func_t) (void *data, async_cookie_t cookie); struct async_domain { struct list_head pending; unsigned registered:1; }; /* * domain participates in global async_synchronize_full */ #define ASYNC_DOMAIN(_name) \ struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \ .registered = 1 } /* * domain is free to go out of scope as soon as all pending work is * complete, this domain does not participate in async_synchronize_full */ #define ASYNC_DOMAIN_EXCLUSIVE(_name) \ struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \ .registered = 0 } async_cookie_t async_schedule_node(async_func_t func, void *data, int node); async_cookie_t async_schedule_node_domain(async_func_t func, void *data, int node, struct async_domain *domain); /** * async_schedule - schedule a function for asynchronous execution * @func: function to execute asynchronously * @data: data pointer to pass to the function * * Returns an async_cookie_t that may be used for checkpointing later. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule(async_func_t func, void *data) { return async_schedule_node(func, data, NUMA_NO_NODE); } /** * async_schedule_domain - schedule a function for asynchronous execution within a certain domain * @func: function to execute asynchronously * @data: data pointer to pass to the function * @domain: the domain * * Returns an async_cookie_t that may be used for checkpointing later. * @domain may be used in the async_synchronize_*_domain() functions to * wait within a certain synchronization domain rather than globally. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule_domain(async_func_t func, void *data, struct async_domain *domain) { return async_schedule_node_domain(func, data, NUMA_NO_NODE, domain); } /** * async_schedule_dev - A device specific version of async_schedule * @func: function to execute asynchronously * @dev: device argument to be passed to function * * Returns an async_cookie_t that may be used for checkpointing later. * @dev is used as both the argument for the function and to provide NUMA * context for where to run the function. By doing this we can try to * provide for the best possible outcome by operating on the device on the * CPUs closest to the device. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule_dev(async_func_t func, struct device *dev) { return async_schedule_node(func, dev, dev_to_node(dev)); } /** * async_schedule_dev_domain - A device specific version of async_schedule_domain * @func: function to execute asynchronously * @dev: device argument to be passed to function * @domain: the domain * * Returns an async_cookie_t that may be used for checkpointing later. * @dev is used as both the argument for the function and to provide NUMA * context for where to run the function. By doing this we can try to * provide for the best possible outcome by operating on the device on the * CPUs closest to the device. * @domain may be used in the async_synchronize_*_domain() functions to * wait within a certain synchronization domain rather than globally. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule_dev_domain(async_func_t func, struct device *dev, struct async_domain *domain) { return async_schedule_node_domain(func, dev, dev_to_node(dev), domain); } void async_unregister_domain(struct async_domain *domain); extern void async_synchronize_full(void); extern void async_synchronize_full_domain(struct async_domain *domain); extern void async_synchronize_cookie(async_cookie_t cookie); extern void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain); extern bool current_is_async(void); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_JUMP_LABEL_H #define _LINUX_JUMP_LABEL_H /* * Jump label support * * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com> * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra * * DEPRECATED API: * * The use of 'struct static_key' directly, is now DEPRECATED. In addition * static_key_{true,false}() is also DEPRECATED. IE DO NOT use the following: * * struct static_key false = STATIC_KEY_INIT_FALSE; * struct static_key true = STATIC_KEY_INIT_TRUE; * static_key_true() * static_key_false() * * The updated API replacements are: * * DEFINE_STATIC_KEY_TRUE(key); * DEFINE_STATIC_KEY_FALSE(key); * DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count); * DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count); * static_branch_likely() * static_branch_unlikely() * * Jump labels provide an interface to generate dynamic branches using * self-modifying code. Assuming toolchain and architecture support, if we * define a "key" that is initially false via "DEFINE_STATIC_KEY_FALSE(key)", * an "if (static_branch_unlikely(&key))" statement is an unconditional branch * (which defaults to false - and the true block is placed out of line). * Similarly, we can define an initially true key via * "DEFINE_STATIC_KEY_TRUE(key)", and use it in the same * "if (static_branch_unlikely(&key))", in which case we will generate an * unconditional branch to the out-of-line true branch. Keys that are * initially true or false can be using in both static_branch_unlikely() * and static_branch_likely() statements. * * At runtime we can change the branch target by setting the key * to true via a call to static_branch_enable(), or false using * static_branch_disable(). If the direction of the branch is switched by * these calls then we run-time modify the branch target via a * no-op -> jump or jump -> no-op conversion. For example, for an * initially false key that is used in an "if (static_branch_unlikely(&key))" * statement, setting the key to true requires us to patch in a jump * to the out-of-line of true branch. * * In addition to static_branch_{enable,disable}, we can also reference count * the key or branch direction via static_branch_{inc,dec}. Thus, * static_branch_inc() can be thought of as a 'make more true' and * static_branch_dec() as a 'make more false'. * * Since this relies on modifying code, the branch modifying functions * must be considered absolute slow paths (machine wide synchronization etc.). * OTOH, since the affected branches are unconditional, their runtime overhead * will be absolutely minimal, esp. in the default (off) case where the total * effect is a single NOP of appropriate size. The on case will patch in a jump * to the out-of-line block. * * When the control is directly exposed to userspace, it is prudent to delay the * decrement to avoid high frequency code modifications which can (and do) * cause significant performance degradation. Struct static_key_deferred and * static_key_slow_dec_deferred() provide for this. * * Lacking toolchain and or architecture support, static keys fall back to a * simple conditional branch. * * Additional babbling in: Documentation/staging/static-keys.rst */ #ifndef __ASSEMBLY__ #include <linux/types.h> #include <linux/compiler.h> extern bool static_key_initialized; #define STATIC_KEY_CHECK_USE(key) WARN(!static_key_initialized, \ "%s(): static key '%pS' used before call to jump_label_init()", \ __func__, (key)) #ifdef CONFIG_JUMP_LABEL struct static_key { atomic_t enabled; /* * Note: * To make anonymous unions work with old compilers, the static * initialization of them requires brackets. This creates a dependency * on the order of the struct with the initializers. If any fields * are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need * to be modified. * * bit 0 => 1 if key is initially true * 0 if initially false * bit 1 => 1 if points to struct static_key_mod * 0 if points to struct jump_entry */ union { unsigned long type; struct jump_entry *entries; struct static_key_mod *next; }; }; #else struct static_key { atomic_t enabled; }; #endif /* CONFIG_JUMP_LABEL */ #endif /* __ASSEMBLY__ */ #ifdef CONFIG_JUMP_LABEL #include <asm/jump_label.h> #ifndef __ASSEMBLY__ #ifdef CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE struct jump_entry { s32 code; s32 target; long key; // key may be far away from the core kernel under KASLR }; static inline unsigned long jump_entry_code(const struct jump_entry *entry) { return (unsigned long)&entry->code + entry->code; } static inline unsigned long jump_entry_target(const struct jump_entry *entry) { return (unsigned long)&entry->target + entry->target; } static inline struct static_key *jump_entry_key(const struct jump_entry *entry) { long offset = entry->key & ~3L; return (struct static_key *)((unsigned long)&entry->key + offset); } #else static inline unsigned long jump_entry_code(const struct jump_entry *entry) { return entry->code; } static inline unsigned long jump_entry_target(const struct jump_entry *entry) { return entry->target; } static inline struct static_key *jump_entry_key(const struct jump_entry *entry) { return (struct static_key *)((unsigned long)entry->key & ~3UL); } #endif static inline bool jump_entry_is_branch(const struct jump_entry *entry) { return (unsigned long)entry->key & 1UL; } static inline bool jump_entry_is_init(const struct jump_entry *entry) { return (unsigned long)entry->key & 2UL; } static inline void jump_entry_set_init(struct jump_entry *entry) { entry->key |= 2; } #endif #endif #ifndef __ASSEMBLY__ enum jump_label_type { JUMP_LABEL_NOP = 0, JUMP_LABEL_JMP, }; struct module; #ifdef CONFIG_JUMP_LABEL #define JUMP_TYPE_FALSE 0UL #define JUMP_TYPE_TRUE 1UL #define JUMP_TYPE_LINKED 2UL #define JUMP_TYPE_MASK 3UL static __always_inline bool static_key_false(struct static_key *key) { return arch_static_branch(key, false); } static __always_inline bool static_key_true(struct static_key *key) { return !arch_static_branch(key, true); } extern struct jump_entry __start___jump_table[]; extern struct jump_entry __stop___jump_table[]; extern void jump_label_init(void); extern void jump_label_lock(void); extern void jump_label_unlock(void); extern void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type); extern void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type); extern bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type); extern void arch_jump_label_transform_apply(void); extern int jump_label_text_reserved(void *start, void *end); extern void static_key_slow_inc(struct static_key *key); extern void static_key_slow_dec(struct static_key *key); extern void static_key_slow_inc_cpuslocked(struct static_key *key); extern void static_key_slow_dec_cpuslocked(struct static_key *key); extern void jump_label_apply_nops(struct module *mod); extern int static_key_count(struct static_key *key); extern void static_key_enable(struct static_key *key); extern void static_key_disable(struct static_key *key); extern void static_key_enable_cpuslocked(struct static_key *key); extern void static_key_disable_cpuslocked(struct static_key *key); /* * We should be using ATOMIC_INIT() for initializing .enabled, but * the inclusion of atomic.h is problematic for inclusion of jump_label.h * in 'low-level' headers. Thus, we are initializing .enabled with a * raw value, but have added a BUILD_BUG_ON() to catch any issues in * jump_label_init() see: kernel/jump_label.c. */ #define STATIC_KEY_INIT_TRUE \ { .enabled = { 1 }, \ { .entries = (void *)JUMP_TYPE_TRUE } } #define STATIC_KEY_INIT_FALSE \ { .enabled = { 0 }, \ { .entries = (void *)JUMP_TYPE_FALSE } } #else /* !CONFIG_JUMP_LABEL */ #include <linux/atomic.h> #include <linux/bug.h> static inline int static_key_count(struct static_key *key) { return atomic_read(&key->enabled); } static __always_inline void jump_label_init(void) { static_key_initialized = true; } static __always_inline bool static_key_false(struct static_key *key) { if (unlikely(static_key_count(key) > 0)) return true; return false; } static __always_inline bool static_key_true(struct static_key *key) { if (likely(static_key_count(key) > 0)) return true; return false; } static inline void static_key_slow_inc(struct static_key *key) { STATIC_KEY_CHECK_USE(key); atomic_inc(&key->enabled); } static inline void static_key_slow_dec(struct static_key *key) { STATIC_KEY_CHECK_USE(key); atomic_dec(&key->enabled); } #define static_key_slow_inc_cpuslocked(key) static_key_slow_inc(key) #define static_key_slow_dec_cpuslocked(key) static_key_slow_dec(key) static inline int jump_label_text_reserved(void *start, void *end) { return 0; } static inline void jump_label_lock(void) {} static inline void jump_label_unlock(void) {} static inline int jump_label_apply_nops(struct module *mod) { return 0; } static inline void static_key_enable(struct static_key *key) { STATIC_KEY_CHECK_USE(key); if (atomic_read(&key->enabled) != 0) { WARN_ON_ONCE(atomic_read(&key->enabled) != 1); return; } atomic_set(&key->enabled, 1); } static inline void static_key_disable(struct static_key *key) { STATIC_KEY_CHECK_USE(key); if (atomic_read(&key->enabled) != 1) { WARN_ON_ONCE(atomic_read(&key->enabled) != 0); return; } atomic_set(&key->enabled, 0); } #define static_key_enable_cpuslocked(k) static_key_enable((k)) #define static_key_disable_cpuslocked(k) static_key_disable((k)) #define STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) } #define STATIC_KEY_INIT_FALSE { .enabled = ATOMIC_INIT(0) } #endif /* CONFIG_JUMP_LABEL */ #define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE #define jump_label_enabled static_key_enabled /* -------------------------------------------------------------------------- */ /* * Two type wrappers around static_key, such that we can use compile time * type differentiation to emit the right code. * * All the below code is macros in order to play type games. */ struct static_key_true { struct static_key key; }; struct static_key_false { struct static_key key; }; #define STATIC_KEY_TRUE_INIT (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE, } #define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, } #define DEFINE_STATIC_KEY_TRUE(name) \ struct static_key_true name = STATIC_KEY_TRUE_INIT #define DEFINE_STATIC_KEY_TRUE_RO(name) \ struct static_key_true name __ro_after_init = STATIC_KEY_TRUE_INIT #define DECLARE_STATIC_KEY_TRUE(name) \ extern struct static_key_true name #define DEFINE_STATIC_KEY_FALSE(name) \ struct static_key_false name = STATIC_KEY_FALSE_INIT #define DEFINE_STATIC_KEY_FALSE_RO(name) \ struct static_key_false name __ro_after_init = STATIC_KEY_FALSE_INIT #define DECLARE_STATIC_KEY_FALSE(name) \ extern struct static_key_false name #define DEFINE_STATIC_KEY_ARRAY_TRUE(name, count) \ struct static_key_true name[count] = { \ [0 ... (count) - 1] = STATIC_KEY_TRUE_INIT, \ } #define DEFINE_STATIC_KEY_ARRAY_FALSE(name, count) \ struct static_key_false name[count] = { \ [0 ... (count) - 1] = STATIC_KEY_FALSE_INIT, \ } extern bool ____wrong_branch_error(void); #define static_key_enabled(x) \ ({ \ if (!__builtin_types_compatible_p(typeof(*x), struct static_key) && \ !__builtin_types_compatible_p(typeof(*x), struct static_key_true) &&\ !__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ ____wrong_branch_error(); \ static_key_count((struct static_key *)x) > 0; \ }) #ifdef CONFIG_JUMP_LABEL /* * Combine the right initial value (type) with the right branch order * to generate the desired result. * * * type\branch| likely (1) | unlikely (0) * -----------+-----------------------+------------------ * | | * true (1) | ... | ... * | NOP | JMP L * | <br-stmts> | 1: ... * | L: ... | * | | * | | L: <br-stmts> * | | jmp 1b * | | * -----------+-----------------------+------------------ * | | * false (0) | ... | ... * | JMP L | NOP * | <br-stmts> | 1: ... * | L: ... | * | | * | | L: <br-stmts> * | | jmp 1b * | | * -----------+-----------------------+------------------ * * The initial value is encoded in the LSB of static_key::entries, * type: 0 = false, 1 = true. * * The branch type is encoded in the LSB of jump_entry::key, * branch: 0 = unlikely, 1 = likely. * * This gives the following logic table: * * enabled type branch instuction * -----------------------------+----------- * 0 0 0 | NOP * 0 0 1 | JMP * 0 1 0 | NOP * 0 1 1 | JMP * * 1 0 0 | JMP * 1 0 1 | NOP * 1 1 0 | JMP * 1 1 1 | NOP * * Which gives the following functions: * * dynamic: instruction = enabled ^ branch * static: instruction = type ^ branch * * See jump_label_type() / jump_label_init_type(). */ #define static_branch_likely(x) \ ({ \ bool branch; \ if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \ branch = !arch_static_branch(&(x)->key, true); \ else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ branch = !arch_static_branch_jump(&(x)->key, true); \ else \ branch = ____wrong_branch_error(); \ likely(branch); \ }) #define static_branch_unlikely(x) \ ({ \ bool branch; \ if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \ branch = arch_static_branch_jump(&(x)->key, false); \ else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ branch = arch_static_branch(&(x)->key, false); \ else \ branch = ____wrong_branch_error(); \ unlikely(branch); \ }) #else /* !CONFIG_JUMP_LABEL */ #define static_branch_likely(x) likely(static_key_enabled(&(x)->key)) #define static_branch_unlikely(x) unlikely(static_key_enabled(&(x)->key)) #endif /* CONFIG_JUMP_LABEL */ /* * Advanced usage; refcount, branch is enabled when: count != 0 */ #define static_branch_inc(x) static_key_slow_inc(&(x)->key) #define static_branch_dec(x) static_key_slow_dec(&(x)->key) #define static_branch_inc_cpuslocked(x) static_key_slow_inc_cpuslocked(&(x)->key) #define static_branch_dec_cpuslocked(x) static_key_slow_dec_cpuslocked(&(x)->key) /* * Normal usage; boolean enable/disable. */ #define static_branch_enable(x) static_key_enable(&(x)->key) #define static_branch_disable(x) static_key_disable(&(x)->key) #define static_branch_enable_cpuslocked(x) static_key_enable_cpuslocked(&(x)->key) #define static_branch_disable_cpuslocked(x) static_key_disable_cpuslocked(&(x)->key) #endif /* __ASSEMBLY__ */ #endif /* _LINUX_JUMP_LABEL_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs */ #ifndef _ASM_X86_STACKTRACE_H #define _ASM_X86_STACKTRACE_H #include <linux/uaccess.h> #include <linux/ptrace.h> #include <asm/cpu_entry_area.h> #include <asm/switch_to.h> enum stack_type { STACK_TYPE_UNKNOWN, STACK_TYPE_TASK, STACK_TYPE_IRQ, STACK_TYPE_SOFTIRQ, STACK_TYPE_ENTRY, STACK_TYPE_EXCEPTION, STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, }; struct stack_info { enum stack_type type; unsigned long *begin, *end, *next_sp; }; bool in_task_stack(unsigned long *stack, struct task_struct *task, struct stack_info *info); bool in_entry_stack(unsigned long *stack, struct stack_info *info); int get_stack_info(unsigned long *stack, struct task_struct *task, struct stack_info *info, unsigned long *visit_mask); bool get_stack_info_noinstr(unsigned long *stack, struct task_struct *task, struct stack_info *info); const char *stack_type_name(enum stack_type type); static inline bool on_stack(struct stack_info *info, void *addr, size_t len) { void *begin = info->begin; void *end = info->end; return (info->type != STACK_TYPE_UNKNOWN && addr >= begin && addr < end && addr + len > begin && addr + len <= end); } #ifdef CONFIG_X86_32 #define STACKSLOTS_PER_LINE 8 #else #define STACKSLOTS_PER_LINE 4 #endif #ifdef CONFIG_FRAME_POINTER static inline unsigned long * get_frame_pointer(struct task_struct *task, struct pt_regs *regs) { if (regs) return (unsigned long *)regs->bp; if (task == current) return __builtin_frame_address(0); return &((struct inactive_task_frame *)task->thread.sp)->bp; } #else static inline unsigned long * get_frame_pointer(struct task_struct *task, struct pt_regs *regs) { return NULL; } #endif /* CONFIG_FRAME_POINTER */ static inline unsigned long * get_stack_pointer(struct task_struct *task, struct pt_regs *regs) { if (regs) return (unsigned long *)regs->sp; if (task == current) return __builtin_frame_address(0); return (unsigned long *)task->thread.sp; } void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, const char *log_lvl); /* The form of the top of the frame on the stack */ struct stack_frame { struct stack_frame *next_frame; unsigned long return_address; }; struct stack_frame_ia32 { u32 next_frame; u32 return_address; }; void show_opcodes(struct pt_regs *regs, const char *loglvl); void show_ip(struct pt_regs *regs, const char *loglvl); #endif /* _ASM_X86_STACKTRACE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_UACCESS_64_H #define _ASM_X86_UACCESS_64_H /* * User space memory access functions */ #include <linux/compiler.h> #include <linux/lockdep.h> #include <linux/kasan-checks.h> #include <asm/alternative.h> #include <asm/cpufeatures.h> #include <asm/page.h> /* * Copy To/From Userspace */ /* Handles exceptions in both to and from, but doesn't do access_ok */ __must_check unsigned long copy_user_enhanced_fast_string(void *to, const void *from, unsigned len); __must_check unsigned long copy_user_generic_string(void *to, const void *from, unsigned len); __must_check unsigned long copy_user_generic_unrolled(void *to, const void *from, unsigned len); static __always_inline __must_check unsigned long copy_user_generic(void *to, const void *from, unsigned len) { unsigned ret; /* * If CPU has ERMS feature, use copy_user_enhanced_fast_string. * Otherwise, if CPU has rep_good feature, use copy_user_generic_string. * Otherwise, use copy_user_generic_unrolled. */ alternative_call_2(copy_user_generic_unrolled, copy_user_generic_string, X86_FEATURE_REP_GOOD, copy_user_enhanced_fast_string, X86_FEATURE_ERMS, ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from), "=d" (len)), "1" (to), "2" (from), "3" (len) : "memory", "rcx", "r8", "r9", "r10", "r11"); return ret; } static __always_inline __must_check unsigned long raw_copy_from_user(void *dst, const void __user *src, unsigned long size) { return copy_user_generic(dst, (__force void *)src, size); } static __always_inline __must_check unsigned long raw_copy_to_user(void __user *dst, const void *src, unsigned long size) { return copy_user_generic((__force void *)dst, src, size); } static __always_inline __must_check unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigned long size) { return copy_user_generic((__force void *)dst, (__force void *)src, size); } extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size, int zerorest); extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size); extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset, size_t len); static inline int __copy_from_user_inatomic_nocache(void *dst, const void __user *src, unsigned size) { kasan_check_write(dst, size); return __copy_user_nocache(dst, src, size, 0); } static inline int __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) { kasan_check_write(dst, size); return __copy_user_flushcache(dst, src, size); } #endif /* _ASM_X86_UACCESS_64_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __CFG802154_RDEV_OPS #define __CFG802154_RDEV_OPS #include <net/cfg802154.h> #include "core.h" #include "trace.h" static inline struct net_device * rdev_add_virtual_intf_deprecated(struct cfg802154_registered_device *rdev, const char *name, unsigned char name_assign_type, int type) { return rdev->ops->add_virtual_intf_deprecated(&rdev->wpan_phy, name, name_assign_type, type); } static inline void rdev_del_virtual_intf_deprecated(struct cfg802154_registered_device *rdev, struct net_device *dev) { rdev->ops->del_virtual_intf_deprecated(&rdev->wpan_phy, dev); } static inline int rdev_suspend(struct cfg802154_registered_device *rdev) { int ret; trace_802154_rdev_suspend(&rdev->wpan_phy); ret = rdev->ops->suspend(&rdev->wpan_phy); trace_802154_rdev_return_int(&rdev->wpan_phy, ret); return ret; } static inline int rdev_resume(struct cfg802154_registered_device *rdev) { int ret; trace_802154_rdev_resume(&rdev->wpan_phy); ret = rdev->ops->resume(&rdev->wpan_phy); trace_802154_rdev_return_int(&rdev->wpan_phy, ret); return ret; } static inline int rdev_add_virtual_intf(struct cfg802154_registered_device *rdev, char *name, unsigned char name_assign_type, enum nl802154_iftype type, __le64 extended_addr) { int ret; trace_802154_rdev_add_virtual_intf(&rdev->wpan_phy, name, type, extended_addr); ret = rdev->ops->add_virtual_intf(&rdev->wpan_phy, name, name_assign_type, type, extended_addr); trace_802154_rdev_return_int(&rdev->wpan_phy, ret); return ret; } static inline int rdev_del_virtual_intf(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev) { int ret; trace_802154_rdev_del_virtual_intf(&rdev->wpan_phy, wpan_dev); ret = rdev->ops->del_virtual_intf(&rdev->wpan_phy, wpan_dev); trace_802154_rdev_return_int(&rdev->wpan_phy, ret); return ret; } static inline int rdev_set_channel(struct cfg802154_registered_device *rdev, u8 page, u8 channel) { int ret; trace_802154_rdev_set_channel(&rdev->wpan_phy, page, channel); ret = rdev->ops->set_channel(&rdev->wpan_phy, page, channel); trace_802154_rdev_return_int(&rdev->wpan_phy, ret); return ret; } static inline int rdev_set_cca_mode(struct cfg802154_registered_device *rdev, const struct wpan_phy_cca *cca) { int ret; trace_802154_rdev_set_cca_mode(&rdev->wpan_phy, cca); ret = rdev->ops->set_cca_mode(&rdev->wpan_phy, cca); trace_802154_rdev_return_int(&rdev->wpan_phy, ret); return ret; } static inline int rdev_set_cca_ed_level(struct cfg802154_registered_device *rdev, s32 ed_level) { int ret; trace_802154_rdev_set_cca_ed_level(&rdev->wpan_phy, ed_level); ret = rdev->ops->set_cca_ed_level(&rdev->wpan_phy, ed_level); trace_802154_rdev_return_int(&rdev->wpan_phy, ret); return ret; } static inline int rdev_set_tx_power(struct cfg802154_registered_device *rdev, s32 power) { int ret; trace_802154_rdev_set_tx_power(&rdev->wpan_phy, power); ret = rdev->ops->set_tx_power(&rdev->wpan_phy, power); trace_802154_rdev_return_int(&rdev->wpan_phy, ret); return ret; } static inline int rdev_set_pan_id(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, __le16 pan_id) { int ret; trace_802154_rdev_set_pan_id(&rdev->wpan_phy, wpan_dev, pan_id); ret = rdev->ops->set_pan_id(&rdev->wpan_phy, wpan_dev, pan_id); trace_802154_rdev_return_int(&rdev->wpan_phy, ret); return ret; } static inline int rdev_set_short_addr(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, __le16 short_addr) { int ret; trace_802154_rdev_set_short_addr(&rdev->wpan_phy, wpan_dev, short_addr); ret = rdev->ops->set_short_addr(&rdev->wpan_phy, wpan_dev, short_addr); trace_802154_rdev_return_int(&rdev->wpan_phy, ret); return ret; } static inline int rdev_set_backoff_exponent(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, u8 min_be, u8 max_be) { int ret; trace_802154_rdev_set_backoff_exponent(&rdev->wpan_phy, wpan_dev, min_be, max_be); ret = rdev->ops->set_backoff_exponent(&rdev->wpan_phy, wpan_dev, min_be, max_be); trace_802154_rdev_return_int(&rdev->wpan_phy, ret); return ret; } static inline int rdev_set_max_csma_backoffs(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, u8 max_csma_backoffs) { int ret; trace_802154_rdev_set_csma_backoffs(&rdev->wpan_phy, wpan_dev, max_csma_backoffs); ret = rdev->ops->set_max_csma_backoffs(&rdev->wpan_phy, wpan_dev, max_csma_backoffs); trace_802154_rdev_return_int(&rdev->wpan_phy, ret); return ret; } static inline int rdev_set_max_frame_retries(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, s8 max_frame_retries) { int ret; trace_802154_rdev_set_max_frame_retries(&rdev->wpan_phy, wpan_dev, max_frame_retries); ret = rdev->ops->set_max_frame_retries(&rdev->wpan_phy, wpan_dev, max_frame_retries); trace_802154_rdev_return_int(&rdev->wpan_phy, ret); return ret; } static inline int rdev_set_lbt_mode(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, bool mode) { int ret; trace_802154_rdev_set_lbt_mode(&rdev->wpan_phy, wpan_dev, mode); ret = rdev->ops->set_lbt_mode(&rdev->wpan_phy, wpan_dev, mode); trace_802154_rdev_return_int(&rdev->wpan_phy, ret); return ret; } static inline int rdev_set_ackreq_default(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, bool ackreq) { int ret; trace_802154_rdev_set_ackreq_default(&rdev->wpan_phy, wpan_dev, ackreq); ret = rdev->ops->set_ackreq_default(&rdev->wpan_phy, wpan_dev, ackreq); trace_802154_rdev_return_int(&rdev->wpan_phy, ret); return ret; } #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL /* TODO this is already a nl802154, so move into ieee802154 */ static inline void rdev_get_llsec_table(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, struct ieee802154_llsec_table **table) { rdev->ops->get_llsec_table(&rdev->wpan_phy, wpan_dev, table); } static inline void rdev_lock_llsec_table(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev) { rdev->ops->lock_llsec_table(&rdev->wpan_phy, wpan_dev); } static inline void rdev_unlock_llsec_table(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev) { rdev->ops->unlock_llsec_table(&rdev->wpan_phy, wpan_dev); } static inline int rdev_get_llsec_params(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, struct ieee802154_llsec_params *params) { return rdev->ops->get_llsec_params(&rdev->wpan_phy, wpan_dev, params); } static inline int rdev_set_llsec_params(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, const struct ieee802154_llsec_params *params, u32 changed) { return rdev->ops->set_llsec_params(&rdev->wpan_phy, wpan_dev, params, changed); } static inline int rdev_add_llsec_key(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, const struct ieee802154_llsec_key_id *id, const struct ieee802154_llsec_key *key) { return rdev->ops->add_llsec_key(&rdev->wpan_phy, wpan_dev, id, key); } static inline int rdev_del_llsec_key(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, const struct ieee802154_llsec_key_id *id) { return rdev->ops->del_llsec_key(&rdev->wpan_phy, wpan_dev, id); } static inline int rdev_add_seclevel(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, const struct ieee802154_llsec_seclevel *sl) { return rdev->ops->add_seclevel(&rdev->wpan_phy, wpan_dev, sl); } static inline int rdev_del_seclevel(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, const struct ieee802154_llsec_seclevel *sl) { return rdev->ops->del_seclevel(&rdev->wpan_phy, wpan_dev, sl); } static inline int rdev_add_device(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, const struct ieee802154_llsec_device *dev_desc) { return rdev->ops->add_device(&rdev->wpan_phy, wpan_dev, dev_desc); } static inline int rdev_del_device(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, __le64 extended_addr) { return rdev->ops->del_device(&rdev->wpan_phy, wpan_dev, extended_addr); } static inline int rdev_add_devkey(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, __le64 extended_addr, const struct ieee802154_llsec_device_key *devkey) { return rdev->ops->add_devkey(&rdev->wpan_phy, wpan_dev, extended_addr, devkey); } static inline int rdev_del_devkey(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, __le64 extended_addr, const struct ieee802154_llsec_device_key *devkey) { return rdev->ops->del_devkey(&rdev->wpan_phy, wpan_dev, extended_addr, devkey); } #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ #endif /* __CFG802154_RDEV_OPS */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 /* SPDX-License-Identifier: GPL-2.0 */ /* * Block data types and constants. Directly include this file only to * break include dependency loop. */ #ifndef __LINUX_BLK_TYPES_H #define __LINUX_BLK_TYPES_H #include <linux/types.h> #include <linux/bvec.h> #include <linux/ktime.h> struct bio_set; struct bio; struct bio_integrity_payload; struct page; struct io_context; struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; struct block_device { dev_t bd_dev; int bd_openers; struct inode * bd_inode; /* will die */ struct super_block * bd_super; struct mutex bd_mutex; /* open/close mutex */ void * bd_claiming; void * bd_holder; int bd_holders; bool bd_write_holder; #ifdef CONFIG_SYSFS struct list_head bd_holder_disks; #endif struct block_device * bd_contains; u8 bd_partno; struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ struct gendisk * bd_disk; struct backing_dev_info *bd_bdi; /* The counter of freeze processes */ int bd_fsfreeze_count; /* Mutex for freeze */ struct mutex bd_fsfreeze_mutex; } __randomize_layout; /* * Block error status values. See block/blk-core:blk_errors for the details. * Alpha cannot write a byte atomically, so we need to use 32-bit value. */ #if defined(CONFIG_ALPHA) && !defined(__alpha_bwx__) typedef u32 __bitwise blk_status_t; #else typedef u8 __bitwise blk_status_t; #endif #define BLK_STS_OK 0 #define BLK_STS_NOTSUPP ((__force blk_status_t)1) #define BLK_STS_TIMEOUT ((__force blk_status_t)2) #define BLK_STS_NOSPC ((__force blk_status_t)3) #define BLK_STS_TRANSPORT ((__force blk_status_t)4) #define BLK_STS_TARGET ((__force blk_status_t)5) #define BLK_STS_NEXUS ((__force blk_status_t)6) #define BLK_STS_MEDIUM ((__force blk_status_t)7) #define BLK_STS_PROTECTION ((__force blk_status_t)8) #define BLK_STS_RESOURCE ((__force blk_status_t)9) #define BLK_STS_IOERR ((__force blk_status_t)10) /* hack for device mapper, don't use elsewhere: */ #define BLK_STS_DM_REQUEUE ((__force blk_status_t)11) #define BLK_STS_AGAIN ((__force blk_status_t)12) /* * BLK_STS_DEV_RESOURCE is returned from the driver to the block layer if * device related resources are unavailable, but the driver can guarantee * that the queue will be rerun in the future once resources become * available again. This is typically the case for device specific * resources that are consumed for IO. If the driver fails allocating these * resources, we know that inflight (or pending) IO will free these * resource upon completion. * * This is different from BLK_STS_RESOURCE in that it explicitly references * a device specific resource. For resources of wider scope, allocation * failure can happen without having pending IO. This means that we can't * rely on request completions freeing these resources, as IO may not be in * flight. Examples of that are kernel memory allocations, DMA mappings, or * any other system wide resources. */ #define BLK_STS_DEV_RESOURCE ((__force blk_status_t)13) /* * BLK_STS_ZONE_RESOURCE is returned from the driver to the block layer if zone * related resources are unavailable, but the driver can guarantee the queue * will be rerun in the future once the resources become available again. * * This is different from BLK_STS_DEV_RESOURCE in that it explicitly references * a zone specific resource and IO to a different zone on the same device could * still be served. Examples of that are zones that are write-locked, but a read * to the same zone could be served. */ #define BLK_STS_ZONE_RESOURCE ((__force blk_status_t)14) /* * BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion * path if the device returns a status indicating that too many zone resources * are currently open. The same command should be successful if resubmitted * after the number of open zones decreases below the device's limits, which is * reported in the request_queue's max_open_zones. */ #define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)15) /* * BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion * path if the device returns a status indicating that too many zone resources * are currently active. The same command should be successful if resubmitted * after the number of active zones decreases below the device's limits, which * is reported in the request_queue's max_active_zones. */ #define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16) /** * blk_path_error - returns true if error may be path related * @error: status the request was completed with * * Description: * This classifies block error status into non-retryable errors and ones * that may be successful if retried on a failover path. * * Return: * %false - retrying failover path will not help * %true - may succeed if retried */ static inline bool blk_path_error(blk_status_t error) { switch (error) { case BLK_STS_NOTSUPP: case BLK_STS_NOSPC: case BLK_STS_TARGET: case BLK_STS_NEXUS: case BLK_STS_MEDIUM: case BLK_STS_PROTECTION: return false; } /* Anything else could be a path failure, so should be retried */ return true; } /* * From most significant bit: * 1 bit: reserved for other usage, see below * 12 bits: original size of bio * 51 bits: issue time of bio */ #define BIO_ISSUE_RES_BITS 1 #define BIO_ISSUE_SIZE_BITS 12 #define BIO_ISSUE_RES_SHIFT (64 - BIO_ISSUE_RES_BITS) #define BIO_ISSUE_SIZE_SHIFT (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS) #define BIO_ISSUE_TIME_MASK ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1) #define BIO_ISSUE_SIZE_MASK \ (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT) #define BIO_ISSUE_RES_MASK (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1)) /* Reserved bit for blk-throtl */ #define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63) struct bio_issue { u64 value; }; static inline u64 __bio_issue_time(u64 time) { return time & BIO_ISSUE_TIME_MASK; } static inline u64 bio_issue_time(struct bio_issue *issue) { return __bio_issue_time(issue->value); } static inline sector_t bio_issue_size(struct bio_issue *issue) { return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT); } static inline void bio_issue_init(struct bio_issue *issue, sector_t size) { size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1; issue->value = ((issue->value & BIO_ISSUE_RES_MASK) | (ktime_get_ns() & BIO_ISSUE_TIME_MASK) | ((u64)size << BIO_ISSUE_SIZE_SHIFT)); } /* * main unit of I/O for the block layer and lower layers (ie drivers and * stacking drivers) */ struct bio { struct bio *bi_next; /* request queue link */ struct gendisk *bi_disk; unsigned int bi_opf; /* bottom bits req flags, * top bits REQ_OP. Use * accessors. */ unsigned short bi_flags; /* status, etc and bvec pool number */ unsigned short bi_ioprio; unsigned short bi_write_hint; blk_status_t bi_status; u8 bi_partno; atomic_t __bi_remaining; struct bvec_iter bi_iter; bio_end_io_t *bi_end_io; void *bi_private; #ifdef CONFIG_BLK_CGROUP /* * Represents the association of the css and request_queue for the bio. * If a bio goes direct to device, it will not have a blkg as it will * not have a request_queue associated with it. The reference is put * on release of the bio. */ struct blkcg_gq *bi_blkg; struct bio_issue bi_issue; #ifdef CONFIG_BLK_CGROUP_IOCOST u64 bi_iocost_cost; #endif #endif #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct bio_crypt_ctx *bi_crypt_context; #endif union { #if defined(CONFIG_BLK_DEV_INTEGRITY) struct bio_integrity_payload *bi_integrity; /* data integrity */ #endif }; unsigned short bi_vcnt; /* how many bio_vec's */ /* * Everything starting with bi_max_vecs will be preserved by bio_reset() */ unsigned short bi_max_vecs; /* max bvl_vecs we can hold */ atomic_t __bi_cnt; /* pin count */ struct bio_vec *bi_io_vec; /* the actual vec list */ struct bio_set *bi_pool; /* * We can inline a number of vecs at the end of the bio, to avoid * double allocations for a small number of bio_vecs. This member * MUST obviously be kept at the very end of the bio. */ struct bio_vec bi_inline_vecs[]; }; #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) /* * bio flags */ enum { BIO_NO_PAGE_REF, /* don't put release vec pages */ BIO_CLONED, /* doesn't own data */ BIO_BOUNCED, /* bio is a bounce bio */ BIO_WORKINGSET, /* contains userspace workingset pages */ BIO_QUIET, /* Make BIO Quiet */ BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ BIO_REFFED, /* bio has elevated ->bi_cnt */ BIO_THROTTLED, /* This bio has already been subjected to * throttling rules. Don't do it again. */ BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion * of this bio. */ BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ BIO_TRACKED, /* set if bio goes through the rq_qos path */ BIO_FLAG_LAST }; /* See BVEC_POOL_OFFSET below before adding new flags */ /* * We support 6 different bvec pools, the last one is magic in that it * is backed by a mempool. */ #define BVEC_POOL_NR 6 #define BVEC_POOL_MAX (BVEC_POOL_NR - 1) /* * Top 3 bits of bio flags indicate the pool the bvecs came from. We add * 1 to the actual index so that 0 indicates that there are no bvecs to be * freed. */ #define BVEC_POOL_BITS (3) #define BVEC_POOL_OFFSET (16 - BVEC_POOL_BITS) #define BVEC_POOL_IDX(bio) ((bio)->bi_flags >> BVEC_POOL_OFFSET) #if (1<< BVEC_POOL_BITS) < (BVEC_POOL_NR+1) # error "BVEC_POOL_BITS is too small" #endif /* * Flags starting here get preserved by bio_reset() - this includes * only BVEC_POOL_IDX() */ #define BIO_RESET_BITS BVEC_POOL_OFFSET typedef __u32 __bitwise blk_mq_req_flags_t; /* * Operations and flags common to the bio and request structures. * We use 8 bits for encoding the operation, and the remaining 24 for flags. * * The least significant bit of the operation number indicates the data * transfer direction: * * - if the least significant bit is set transfers are TO the device * - if the least significant bit is not set transfers are FROM the device * * If a operation does not transfer data the least significant bit has no * meaning. */ #define REQ_OP_BITS 8 #define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) #define REQ_FLAG_BITS 24 enum req_opf { /* read sectors from the device */ REQ_OP_READ = 0, /* write sectors to the device */ REQ_OP_WRITE = 1, /* flush the volatile write cache */ REQ_OP_FLUSH = 2, /* discard sectors */ REQ_OP_DISCARD = 3, /* securely erase sectors */ REQ_OP_SECURE_ERASE = 5, /* write the same sector many times */ REQ_OP_WRITE_SAME = 7, /* write the zero filled sector many times */ REQ_OP_WRITE_ZEROES = 9, /* Open a zone */ REQ_OP_ZONE_OPEN = 10, /* Close a zone */ REQ_OP_ZONE_CLOSE = 11, /* Transition a zone to full */ REQ_OP_ZONE_FINISH = 12, /* write data at the current zone write pointer */ REQ_OP_ZONE_APPEND = 13, /* reset a zone write pointer */ REQ_OP_ZONE_RESET = 15, /* reset all the zone present on the device */ REQ_OP_ZONE_RESET_ALL = 17, /* SCSI passthrough using struct scsi_request */ REQ_OP_SCSI_IN = 32, REQ_OP_SCSI_OUT = 33, /* Driver private requests */ REQ_OP_DRV_IN = 34, REQ_OP_DRV_OUT = 35, REQ_OP_LAST, }; enum req_flag_bits { __REQ_FAILFAST_DEV = /* no driver retries of device errors */ REQ_OP_BITS, __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ __REQ_SYNC, /* request is sync (sync write or read) */ __REQ_META, /* metadata io request */ __REQ_PRIO, /* boost priority in cfq */ __REQ_NOMERGE, /* don't touch this for merging */ __REQ_IDLE, /* anticipate more IO after this one */ __REQ_INTEGRITY, /* I/O includes block integrity payload */ __REQ_FUA, /* forced unit access */ __REQ_PREFLUSH, /* request for cache flush */ __REQ_RAHEAD, /* read ahead, can fail anytime */ __REQ_BACKGROUND, /* background IO */ __REQ_NOWAIT, /* Don't wait if request will block */ /* * When a shared kthread needs to issue a bio for a cgroup, doing * so synchronously can lead to priority inversions as the kthread * can be trapped waiting for that cgroup. CGROUP_PUNT flag makes * submit_bio() punt the actual issuing to a dedicated per-blkcg * work item to avoid such priority inversions. */ __REQ_CGROUP_PUNT, /* command specific flags for REQ_OP_WRITE_ZEROES: */ __REQ_NOUNMAP, /* do not free blocks when zeroing */ __REQ_HIPRI, /* for driver use */ __REQ_DRV, __REQ_SWAP, /* swapping request. */ __REQ_NR_BITS, /* stops here */ }; #define REQ_FAILFAST_DEV (1ULL << __REQ_FAILFAST_DEV) #define REQ_FAILFAST_TRANSPORT (1ULL << __REQ_FAILFAST_TRANSPORT) #define REQ_FAILFAST_DRIVER (1ULL << __REQ_FAILFAST_DRIVER) #define REQ_SYNC (1ULL << __REQ_SYNC) #define REQ_META (1ULL << __REQ_META) #define REQ_PRIO (1ULL << __REQ_PRIO) #define REQ_NOMERGE (1ULL << __REQ_NOMERGE) #define REQ_IDLE (1ULL << __REQ_IDLE) #define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY) #define REQ_FUA (1ULL << __REQ_FUA) #define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) #define REQ_HIPRI (1ULL << __REQ_HIPRI) #define REQ_DRV (1ULL << __REQ_DRV) #define REQ_SWAP (1ULL << __REQ_SWAP) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_NOMERGE_FLAGS \ (REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA) enum stat_group { STAT_READ, STAT_WRITE, STAT_DISCARD, STAT_FLUSH, NR_STAT_GROUPS }; #define bio_op(bio) \ ((bio)->bi_opf & REQ_OP_MASK) #define req_op(req) \ ((req)->cmd_flags & REQ_OP_MASK) /* obsolete, don't use in new code */ static inline void bio_set_op_attrs(struct bio *bio, unsigned op, unsigned op_flags) { bio->bi_opf = op | op_flags; } static inline bool op_is_write(unsigned int op) { return (op & 1); } /* * Check if the bio or request is one that needs special treatment in the * flush state machine. */ static inline bool op_is_flush(unsigned int op) { return op & (REQ_FUA | REQ_PREFLUSH); } /* * Reads are always treated as synchronous, as are requests with the FUA or * PREFLUSH flag. Other operations may be marked as synchronous using the * REQ_SYNC flag. */ static inline bool op_is_sync(unsigned int op) { return (op & REQ_OP_MASK) == REQ_OP_READ || (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH)); } static inline bool op_is_discard(unsigned int op) { return (op & REQ_OP_MASK) == REQ_OP_DISCARD; } /* * Check if a bio or request operation is a zone management operation, with * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case * due to its different handling in the block layer and device response in * case of command failure. */ static inline bool op_is_zone_mgmt(enum req_opf op) { switch (op & REQ_OP_MASK) { case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_OPEN: case REQ_OP_ZONE_CLOSE: case REQ_OP_ZONE_FINISH: return true; default: return false; } } static inline int op_stat_group(unsigned int op) { if (op_is_discard(op)) return STAT_DISCARD; return op_is_write(op); } typedef unsigned int blk_qc_t; #define BLK_QC_T_NONE -1U #define BLK_QC_T_SHIFT 16 #define BLK_QC_T_INTERNAL (1U << 31) static inline bool blk_qc_t_valid(blk_qc_t cookie) { return cookie != BLK_QC_T_NONE; } static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) { return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT; } static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) { return cookie & ((1u << BLK_QC_T_SHIFT) - 1); } static inline bool blk_qc_t_is_internal(blk_qc_t cookie) { return (cookie & BLK_QC_T_INTERNAL) != 0; } struct blk_rq_stat { u64 mean; u64 min; u64 max; u32 nr_samples; u64 batch; }; #endif /* __LINUX_BLK_TYPES_H */
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> */ #include <linux/dcache.h> #include <linux/fs.h> #include <linux/gfp.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/srcu.h> #include <linux/fsnotify_backend.h> #include "fsnotify.h" /* * Clear all of the marks on an inode when it is being evicted from core */ void __fsnotify_inode_delete(struct inode *inode) { fsnotify_clear_marks_by_inode(inode); } EXPORT_SYMBOL_GPL(__fsnotify_inode_delete); void __fsnotify_vfsmount_delete(struct vfsmount *mnt) { fsnotify_clear_marks_by_mount(mnt); } /** * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. * @sb: superblock being unmounted. * * Called during unmount with no locks held, so needs to be safe against * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block. */ static void fsnotify_unmount_inodes(struct super_block *sb) { struct inode *inode, *iput_inode = NULL; spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { /* * We cannot __iget() an inode in state I_FREEING, * I_WILL_FREE, or I_NEW which is fine because by that point * the inode cannot have any associated watches. */ spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { spin_unlock(&inode->i_lock); continue; } /* * If i_count is zero, the inode cannot have any watches and * doing an __iget/iput with SB_ACTIVE clear would actually * evict all inodes with zero i_count from icache which is * unnecessarily violent and may in fact be illegal to do. * However, we should have been called /after/ evict_inodes * removed all zero refcount inodes, in any case. Test to * be sure. */ if (!atomic_read(&inode->i_count)) { spin_unlock(&inode->i_lock); continue; } __iget(inode); spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); if (iput_inode) iput(iput_inode); /* for each watch, send FS_UNMOUNT and then remove it */ fsnotify_inode(inode, FS_UNMOUNT); fsnotify_inode_delete(inode); iput_inode = inode; cond_resched(); spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); if (iput_inode) iput(iput_inode); /* Wait for outstanding inode references from connectors */ wait_var_event(&sb->s_fsnotify_inode_refs, !atomic_long_read(&sb->s_fsnotify_inode_refs)); } void fsnotify_sb_delete(struct super_block *sb) { fsnotify_unmount_inodes(sb); fsnotify_clear_marks_by_sb(sb); } /* * Given an inode, first check if we care what happens to our children. Inotify * and dnotify both tell their parents about events. If we care about any event * on a child we run all of our children and set a dentry flag saying that the * parent cares. Thus when an event happens on a child it can quickly tell if * if there is a need to find a parent and send the event to the parent. */ void __fsnotify_update_child_dentry_flags(struct inode *inode) { struct dentry *alias; int watched; if (!S_ISDIR(inode->i_mode)) return; /* determine if the children should tell inode about their events */ watched = fsnotify_inode_watches_children(inode); spin_lock(&inode->i_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { struct dentry *child; /* run all of the children of the original inode and fix their * d_flags to indicate parental interest (their parent is the * original inode) */ spin_lock(&alias->d_lock); list_for_each_entry(child, &alias->d_subdirs, d_child) { if (!child->d_inode) continue; spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); if (watched) child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; else child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; spin_unlock(&child->d_lock); } spin_unlock(&alias->d_lock); } spin_unlock(&inode->i_lock); } /* Are inode/sb/mount interested in parent and name info with this event? */ static bool fsnotify_event_needs_parent(struct inode *inode, struct mount *mnt, __u32 mask) { __u32 marks_mask = 0; /* We only send parent/name to inode/sb/mount for events on non-dir */ if (mask & FS_ISDIR) return false; /* * All events that are possible on child can also may be reported with * parent/name info to inode/sb/mount. Otherwise, a watching parent * could result in events reported with unexpected name info to sb/mount. */ BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD & ~FS_EVENTS_POSS_TO_PARENT); /* Did either inode/sb/mount subscribe for events with parent/name? */ marks_mask |= fsnotify_parent_needed_mask(inode->i_fsnotify_mask); marks_mask |= fsnotify_parent_needed_mask(inode->i_sb->s_fsnotify_mask); if (mnt) marks_mask |= fsnotify_parent_needed_mask(mnt->mnt_fsnotify_mask); /* Did they subscribe for this event with parent/name info? */ return mask & marks_mask; } /* * Notify this dentry's parent about a child's events with child name info * if parent is watching or if inode/sb/mount are interested in events with * parent and name info. * * Notify only the child without name info if parent is not watching and * inode/sb/mount are not interested in events with parent and name info. */ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, int data_type) { const struct path *path = fsnotify_data_path(data, data_type); struct mount *mnt = path ? real_mount(path->mnt) : NULL; struct inode *inode = d_inode(dentry); struct dentry *parent; bool parent_watched = dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED; bool parent_needed, parent_interested; __u32 p_mask; struct inode *p_inode = NULL; struct name_snapshot name; struct qstr *file_name = NULL; int ret = 0; /* * Do inode/sb/mount care about parent and name info on non-dir? * Do they care about any event at all? */ if (!inode->i_fsnotify_marks && !inode->i_sb->s_fsnotify_marks && (!mnt || !mnt->mnt_fsnotify_marks) && !parent_watched) return 0; parent = NULL; parent_needed = fsnotify_event_needs_parent(inode, mnt, mask); if (!parent_watched && !parent_needed) goto notify; /* Does parent inode care about events on children? */ parent = dget_parent(dentry); p_inode = parent->d_inode; p_mask = fsnotify_inode_watches_children(p_inode); if (unlikely(parent_watched && !p_mask)) __fsnotify_update_child_dentry_flags(p_inode); /* * Include parent/name in notification either if some notification * groups require parent info or the parent is interested in this event. */ parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS; if (parent_needed || parent_interested) { /* When notifying parent, child should be passed as data */ WARN_ON_ONCE(inode != fsnotify_data_inode(data, data_type)); /* Notify both parent and child with child name info */ take_dentry_name_snapshot(&name, dentry); file_name = &name.name; if (parent_interested) mask |= FS_EVENT_ON_CHILD; } notify: ret = fsnotify(mask, data, data_type, p_inode, file_name, inode, 0); if (file_name) release_dentry_name_snapshot(&name); dput(parent); return ret; } EXPORT_SYMBOL_GPL(__fsnotify_parent); static int fsnotify_handle_inode_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, u32 cookie) { const struct path *path = fsnotify_data_path(data, data_type); struct inode *inode = fsnotify_data_inode(data, data_type); const struct fsnotify_ops *ops = group->ops; if (WARN_ON_ONCE(!ops->handle_inode_event)) return 0; if ((inode_mark->mask & FS_EXCL_UNLINK) && path && d_unlinked(path->dentry)) return 0; /* Check interest of this mark in case event was sent with two marks */ if (!(mask & inode_mark->mask & ALL_FSNOTIFY_EVENTS)) return 0; return ops->handle_inode_event(inode_mark, mask, inode, dir, name, cookie); } static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, u32 cookie, struct fsnotify_iter_info *iter_info) { struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); struct fsnotify_mark *parent_mark = fsnotify_iter_parent_mark(iter_info); int ret; if (WARN_ON_ONCE(fsnotify_iter_sb_mark(iter_info)) || WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info))) return 0; if (parent_mark) { /* * parent_mark indicates that the parent inode is watching * children and interested in this event, which is an event * possible on child. But is *this mark* watching children and * interested in this event? */ if (parent_mark->mask & FS_EVENT_ON_CHILD) { ret = fsnotify_handle_inode_event(group, parent_mark, mask, data, data_type, dir, name, 0); if (ret) return ret; } if (!inode_mark) return 0; } if (mask & FS_EVENT_ON_CHILD) { /* * Some events can be sent on both parent dir and child marks * (e.g. FS_ATTRIB). If both parent dir and child are * watching, report the event once to parent dir with name (if * interested) and once to child without name (if interested). * The child watcher is expecting an event without a file name * and without the FS_EVENT_ON_CHILD flag. */ mask &= ~FS_EVENT_ON_CHILD; dir = NULL; name = NULL; } return fsnotify_handle_inode_event(group, inode_mark, mask, data, data_type, dir, name, cookie); } static int send_to_group(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *file_name, u32 cookie, struct fsnotify_iter_info *iter_info) { struct fsnotify_group *group = NULL; __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS); __u32 marks_mask = 0; __u32 marks_ignored_mask = 0; struct fsnotify_mark *mark; int type; if (WARN_ON(!iter_info->report_mask)) return 0; /* clear ignored on inode modification */ if (mask & FS_MODIFY) { fsnotify_foreach_obj_type(type) { if (!fsnotify_iter_should_report_type(iter_info, type)) continue; mark = iter_info->marks[type]; if (mark && !(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) mark->ignored_mask = 0; } } fsnotify_foreach_obj_type(type) { if (!fsnotify_iter_should_report_type(iter_info, type)) continue; mark = iter_info->marks[type]; /* does the object mark tell us to do something? */ if (mark) { group = mark->group; marks_mask |= mark->mask; marks_ignored_mask |= mark->ignored_mask; } } pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignored_mask=%x data=%p data_type=%d dir=%p cookie=%d\n", __func__, group, mask, marks_mask, marks_ignored_mask, data, data_type, dir, cookie); if (!(test_mask & marks_mask & ~marks_ignored_mask)) return 0; if (group->ops->handle_event) { return group->ops->handle_event(group, mask, data, data_type, dir, file_name, cookie, iter_info); } return fsnotify_handle_event(group, mask, data, data_type, dir, file_name, cookie, iter_info); } static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp) { struct fsnotify_mark_connector *conn; struct hlist_node *node = NULL; conn = srcu_dereference(*connp, &fsnotify_mark_srcu); if (conn) node = srcu_dereference(conn->list.first, &fsnotify_mark_srcu); return hlist_entry_safe(node, struct fsnotify_mark, obj_list); } static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) { struct hlist_node *node = NULL; if (mark) node = srcu_dereference(mark->obj_list.next, &fsnotify_mark_srcu); return hlist_entry_safe(node, struct fsnotify_mark, obj_list); } /* * iter_info is a multi head priority queue of marks. * Pick a subset of marks from queue heads, all with the * same group and set the report_mask for selected subset. * Returns the report_mask of the selected subset. */ static unsigned int fsnotify_iter_select_report_types( struct fsnotify_iter_info *iter_info) { struct fsnotify_group *max_prio_group = NULL; struct fsnotify_mark *mark; int type; /* Choose max prio group among groups of all queue heads */ fsnotify_foreach_obj_type(type) { mark = iter_info->marks[type]; if (mark && fsnotify_compare_groups(max_prio_group, mark->group) > 0) max_prio_group = mark->group; } if (!max_prio_group) return 0; /* Set the report mask for marks from same group as max prio group */ iter_info->report_mask = 0; fsnotify_foreach_obj_type(type) { mark = iter_info->marks[type]; if (mark && fsnotify_compare_groups(max_prio_group, mark->group) == 0) fsnotify_iter_set_report_type(iter_info, type); } return iter_info->report_mask; } /* * Pop from iter_info multi head queue, the marks that were iterated in the * current iteration step. */ static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info) { int type; fsnotify_foreach_obj_type(type) { if (fsnotify_iter_should_report_type(iter_info, type)) iter_info->marks[type] = fsnotify_next_mark(iter_info->marks[type]); } } /* * fsnotify - This is the main call to fsnotify. * * The VFS calls into hook specific functions in linux/fsnotify.h. * Those functions then in turn call here. Here will call out to all of the * registered fsnotify_group. Those groups can then use the notification event * in whatever means they feel necessary. * * @mask: event type and flags * @data: object that event happened on * @data_type: type of object for fanotify_data_XXX() accessors * @dir: optional directory associated with event - * if @file_name is not NULL, this is the directory that * @file_name is relative to * @file_name: optional file name associated with event * @inode: optional inode associated with event - * either @dir or @inode must be non-NULL. * if both are non-NULL event may be reported to both. * @cookie: inotify rename cookie */ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *file_name, struct inode *inode, u32 cookie) { const struct path *path = fsnotify_data_path(data, data_type); struct fsnotify_iter_info iter_info = {}; struct super_block *sb; struct mount *mnt = NULL; struct inode *parent = NULL; int ret = 0; __u32 test_mask, marks_mask; if (path) mnt = real_mount(path->mnt); if (!inode) { /* Dirent event - report on TYPE_INODE to dir */ inode = dir; } else if (mask & FS_EVENT_ON_CHILD) { /* * Event on child - report on TYPE_PARENT to dir if it is * watching children and on TYPE_INODE to child. */ parent = dir; } sb = inode->i_sb; /* * Optimization: srcu_read_lock() has a memory barrier which can * be expensive. It protects walking the *_fsnotify_marks lists. * However, if we do not walk the lists, we do not have to do * SRCU because we have no references to any objects and do not * need SRCU to keep them "alive". */ if (!sb->s_fsnotify_marks && (!mnt || !mnt->mnt_fsnotify_marks) && (!inode || !inode->i_fsnotify_marks) && (!parent || !parent->i_fsnotify_marks)) return 0; marks_mask = sb->s_fsnotify_mask; if (mnt) marks_mask |= mnt->mnt_fsnotify_mask; if (inode) marks_mask |= inode->i_fsnotify_mask; if (parent) marks_mask |= parent->i_fsnotify_mask; /* * if this is a modify event we may need to clear the ignored masks * otherwise return if none of the marks care about this type of event. */ test_mask = (mask & ALL_FSNOTIFY_EVENTS); if (!(mask & FS_MODIFY) && !(test_mask & marks_mask)) return 0; iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] = fsnotify_first_mark(&sb->s_fsnotify_marks); if (mnt) { iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] = fsnotify_first_mark(&mnt->mnt_fsnotify_marks); } if (inode) { iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] = fsnotify_first_mark(&inode->i_fsnotify_marks); } if (parent) { iter_info.marks[FSNOTIFY_OBJ_TYPE_PARENT] = fsnotify_first_mark(&parent->i_fsnotify_marks); } /* * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark * ignore masks are properly reflected for mount/sb mark notifications. * That's why this traversal is so complicated... */ while (fsnotify_iter_select_report_types(&iter_info)) { ret = send_to_group(mask, data, data_type, dir, file_name, cookie, &iter_info); if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) goto out; fsnotify_iter_next(&iter_info); } ret = 0; out: srcu_read_unlock(&fsnotify_mark_srcu, iter_info.srcu_idx); return ret; } EXPORT_SYMBOL_GPL(fsnotify); static __init int fsnotify_init(void) { int ret; BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) panic("initializing fsnotify_mark_srcu"); fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector, SLAB_PANIC); return 0; } core_initcall(fsnotify_init);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 /* SPDX-License-Identifier: GPL-2.0 */ /* * Internal header to deal with irq_desc->status which will be renamed * to irq_desc->settings. */ enum { _IRQ_DEFAULT_INIT_FLAGS = IRQ_DEFAULT_INIT_FLAGS, _IRQ_PER_CPU = IRQ_PER_CPU, _IRQ_LEVEL = IRQ_LEVEL, _IRQ_NOPROBE = IRQ_NOPROBE, _IRQ_NOREQUEST = IRQ_NOREQUEST, _IRQ_NOTHREAD = IRQ_NOTHREAD, _IRQ_NOAUTOEN = IRQ_NOAUTOEN, _IRQ_MOVE_PCNTXT = IRQ_MOVE_PCNTXT, _IRQ_NO_BALANCING = IRQ_NO_BALANCING, _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD, _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID, _IRQ_IS_POLLED = IRQ_IS_POLLED, _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY, _IRQ_HIDDEN = IRQ_HIDDEN, _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, }; #define IRQ_PER_CPU GOT_YOU_MORON #define IRQ_NO_BALANCING GOT_YOU_MORON #define IRQ_LEVEL GOT_YOU_MORON #define IRQ_NOPROBE GOT_YOU_MORON #define IRQ_NOREQUEST GOT_YOU_MORON #define IRQ_NOTHREAD GOT_YOU_MORON #define IRQ_NOAUTOEN GOT_YOU_MORON #define IRQ_NESTED_THREAD GOT_YOU_MORON #define IRQ_PER_CPU_DEVID GOT_YOU_MORON #define IRQ_IS_POLLED GOT_YOU_MORON #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON #define IRQ_HIDDEN GOT_YOU_MORON #undef IRQF_MODIFY_MASK #define IRQF_MODIFY_MASK GOT_YOU_MORON static inline void irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set) { desc->status_use_accessors &= ~(clr & _IRQF_MODIFY_MASK); desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK); } static inline bool irq_settings_is_per_cpu(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_PER_CPU; } static inline bool irq_settings_is_per_cpu_devid(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_PER_CPU_DEVID; } static inline void irq_settings_set_per_cpu(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_PER_CPU; } static inline void irq_settings_set_no_balancing(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_NO_BALANCING; } static inline bool irq_settings_has_no_balance_set(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_NO_BALANCING; } static inline u32 irq_settings_get_trigger_mask(struct irq_desc *desc) { return desc->status_use_accessors & IRQ_TYPE_SENSE_MASK; } static inline void irq_settings_set_trigger_mask(struct irq_desc *desc, u32 mask) { desc->status_use_accessors &= ~IRQ_TYPE_SENSE_MASK; desc->status_use_accessors |= mask & IRQ_TYPE_SENSE_MASK; } static inline bool irq_settings_is_level(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_LEVEL; } static inline void irq_settings_clr_level(struct irq_desc *desc) { desc->status_use_accessors &= ~_IRQ_LEVEL; } static inline void irq_settings_set_level(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_LEVEL; } static inline bool irq_settings_can_request(struct irq_desc *desc) { return !(desc->status_use_accessors & _IRQ_NOREQUEST); } static inline void irq_settings_clr_norequest(struct irq_desc *desc) { desc->status_use_accessors &= ~_IRQ_NOREQUEST; } static inline void irq_settings_set_norequest(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_NOREQUEST; } static inline bool irq_settings_can_thread(struct irq_desc *desc) { return !(desc->status_use_accessors & _IRQ_NOTHREAD); } static inline void irq_settings_clr_nothread(struct irq_desc *desc) { desc->status_use_accessors &= ~_IRQ_NOTHREAD; } static inline void irq_settings_set_nothread(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_NOTHREAD; } static inline bool irq_settings_can_probe(struct irq_desc *desc) { return !(desc->status_use_accessors & _IRQ_NOPROBE); } static inline void irq_settings_clr_noprobe(struct irq_desc *desc) { desc->status_use_accessors &= ~_IRQ_NOPROBE; } static inline void irq_settings_set_noprobe(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_NOPROBE; } static inline bool irq_settings_can_move_pcntxt(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_MOVE_PCNTXT; } static inline bool irq_settings_can_autoenable(struct irq_desc *desc) { return !(desc->status_use_accessors & _IRQ_NOAUTOEN); } static inline bool irq_settings_is_nested_thread(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_NESTED_THREAD; } static inline bool irq_settings_is_polled(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_IS_POLLED; } static inline bool irq_settings_disable_unlazy(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_DISABLE_UNLAZY; } static inline void irq_settings_clr_disable_unlazy(struct irq_desc *desc) { desc->status_use_accessors &= ~_IRQ_DISABLE_UNLAZY; } static inline bool irq_settings_is_hidden(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_HIDDEN; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 /* * DRBG based on NIST SP800-90A * * Copyright Stephan Mueller <smueller@chronox.de>, 2014 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, and the entire permission notice in its entirety, * including the disclaimer of warranties. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * ALTERNATIVELY, this product may be distributed under the terms of * the GNU General Public License, in which case the provisions of the GPL are * required INSTEAD OF the above restrictions. (This clause is * necessary due to a potential bad interaction between the GPL and * the restrictions contained in a BSD-style copyright.) * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ #ifndef _DRBG_H #define _DRBG_H #include <linux/random.h> #include <linux/scatterlist.h> #include <crypto/hash.h> #include <crypto/skcipher.h> #include <linux/module.h> #include <linux/crypto.h> #include <linux/slab.h> #include <crypto/internal/rng.h> #include <crypto/rng.h> #include <linux/fips.h> #include <linux/mutex.h> #include <linux/list.h> #include <linux/workqueue.h> /* * Concatenation Helper and string operation helper * * SP800-90A requires the concatenation of different data. To avoid copying * buffers around or allocate additional memory, the following data structure * is used to point to the original memory with its size. In addition, it * is used to build a linked list. The linked list defines the concatenation * of individual buffers. The order of memory block referenced in that * linked list determines the order of concatenation. */ struct drbg_string { const unsigned char *buf; size_t len; struct list_head list; }; static inline void drbg_string_fill(struct drbg_string *string, const unsigned char *buf, size_t len) { string->buf = buf; string->len = len; INIT_LIST_HEAD(&string->list); } struct drbg_state; typedef uint32_t drbg_flag_t; struct drbg_core { drbg_flag_t flags; /* flags for the cipher */ __u8 statelen; /* maximum state length */ __u8 blocklen_bytes; /* block size of output in bytes */ char cra_name[CRYPTO_MAX_ALG_NAME]; /* mapping to kernel crypto API */ /* kernel crypto API backend cipher name */ char backend_cra_name[CRYPTO_MAX_ALG_NAME]; }; struct drbg_state_ops { int (*update)(struct drbg_state *drbg, struct list_head *seed, int reseed); int (*generate)(struct drbg_state *drbg, unsigned char *buf, unsigned int buflen, struct list_head *addtl); int (*crypto_init)(struct drbg_state *drbg); int (*crypto_fini)(struct drbg_state *drbg); }; struct drbg_test_data { struct drbg_string *testentropy; /* TEST PARAMETER: test entropy */ }; struct drbg_state { struct mutex drbg_mutex; /* lock around DRBG */ unsigned char *V; /* internal state 10.1.1.1 1a) */ unsigned char *Vbuf; /* hash: static value 10.1.1.1 1b) hmac / ctr: key */ unsigned char *C; unsigned char *Cbuf; /* Number of RNG requests since last reseed -- 10.1.1.1 1c) */ size_t reseed_ctr; size_t reseed_threshold; /* some memory the DRBG can use for its operation */ unsigned char *scratchpad; unsigned char *scratchpadbuf; void *priv_data; /* Cipher handle */ struct crypto_skcipher *ctr_handle; /* CTR mode cipher handle */ struct skcipher_request *ctr_req; /* CTR mode request handle */ __u8 *outscratchpadbuf; /* CTR mode output scratchpad */ __u8 *outscratchpad; /* CTR mode aligned outbuf */ struct crypto_wait ctr_wait; /* CTR mode async wait obj */ struct scatterlist sg_in, sg_out; /* CTR mode SGLs */ bool seeded; /* DRBG fully seeded? */ bool pr; /* Prediction resistance enabled? */ bool fips_primed; /* Continuous test primed? */ unsigned char *prev; /* FIPS 140-2 continuous test value */ struct work_struct seed_work; /* asynchronous seeding support */ struct crypto_rng *jent; const struct drbg_state_ops *d_ops; const struct drbg_core *core; struct drbg_string test_data; struct random_ready_callback random_ready; }; static inline __u8 drbg_statelen(struct drbg_state *drbg) { if (drbg && drbg->core) return drbg->core->statelen; return 0; } static inline __u8 drbg_blocklen(struct drbg_state *drbg) { if (drbg && drbg->core) return drbg->core->blocklen_bytes; return 0; } static inline __u8 drbg_keylen(struct drbg_state *drbg) { if (drbg && drbg->core) return (drbg->core->statelen - drbg->core->blocklen_bytes); return 0; } static inline size_t drbg_max_request_bytes(struct drbg_state *drbg) { /* SP800-90A requires the limit 2**19 bits, but we return bytes */ return (1 << 16); } static inline size_t drbg_max_addtl(struct drbg_state *drbg) { /* SP800-90A requires 2**35 bytes additional info str / pers str */ #if (__BITS_PER_LONG == 32) /* * SP800-90A allows smaller maximum numbers to be returned -- we * return SIZE_MAX - 1 to allow the verification of the enforcement * of this value in drbg_healthcheck_sanity. */ return (SIZE_MAX - 1); #else return (1UL<<35); #endif } static inline size_t drbg_max_requests(struct drbg_state *drbg) { /* SP800-90A requires 2**48 maximum requests before reseeding */ return (1<<20); } /* * This is a wrapper to the kernel crypto API function of * crypto_rng_generate() to allow the caller to provide additional data. * * @drng DRBG handle -- see crypto_rng_get_bytes * @outbuf output buffer -- see crypto_rng_get_bytes * @outlen length of output buffer -- see crypto_rng_get_bytes * @addtl_input additional information string input buffer * @addtllen length of additional information string buffer * * return * see crypto_rng_get_bytes */ static inline int crypto_drbg_get_bytes_addtl(struct crypto_rng *drng, unsigned char *outbuf, unsigned int outlen, struct drbg_string *addtl) { return crypto_rng_generate(drng, addtl->buf, addtl->len, outbuf, outlen); } /* * TEST code * * This is a wrapper to the kernel crypto API function of * crypto_rng_generate() to allow the caller to provide additional data and * allow furnishing of test_data * * @drng DRBG handle -- see crypto_rng_get_bytes * @outbuf output buffer -- see crypto_rng_get_bytes * @outlen length of output buffer -- see crypto_rng_get_bytes * @addtl_input additional information string input buffer * @addtllen length of additional information string buffer * @test_data filled test data * * return * see crypto_rng_get_bytes */ static inline int crypto_drbg_get_bytes_addtl_test(struct crypto_rng *drng, unsigned char *outbuf, unsigned int outlen, struct drbg_string *addtl, struct drbg_test_data *test_data) { crypto_rng_set_entropy(drng, test_data->testentropy->buf, test_data->testentropy->len); return crypto_rng_generate(drng, addtl->buf, addtl->len, outbuf, outlen); } /* * TEST code * * This is a wrapper to the kernel crypto API function of * crypto_rng_reset() to allow the caller to provide test_data * * @drng DRBG handle -- see crypto_rng_reset * @pers personalization string input buffer * @perslen length of additional information string buffer * @test_data filled test data * * return * see crypto_rng_reset */ static inline int crypto_drbg_reset_test(struct crypto_rng *drng, struct drbg_string *pers, struct drbg_test_data *test_data) { crypto_rng_set_entropy(drng, test_data->testentropy->buf, test_data->testentropy->len); return crypto_rng_reset(drng, pers->buf, pers->len); } /* DRBG type flags */ #define DRBG_CTR ((drbg_flag_t)1<<0) #define DRBG_HMAC ((drbg_flag_t)1<<1) #define DRBG_HASH ((drbg_flag_t)1<<2) #define DRBG_TYPE_MASK (DRBG_CTR | DRBG_HMAC | DRBG_HASH) /* DRBG strength flags */ #define DRBG_STRENGTH128 ((drbg_flag_t)1<<3) #define DRBG_STRENGTH192 ((drbg_flag_t)1<<4) #define DRBG_STRENGTH256 ((drbg_flag_t)1<<5) #define DRBG_STRENGTH_MASK (DRBG_STRENGTH128 | DRBG_STRENGTH192 | \ DRBG_STRENGTH256) enum drbg_prefixes { DRBG_PREFIX0 = 0x00, DRBG_PREFIX1, DRBG_PREFIX2, DRBG_PREFIX3 }; #endif /* _DRBG_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * AEAD: Authenticated Encryption with Associated Data * * Copyright (c) 2007-2015 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_AEAD_H #define _CRYPTO_AEAD_H #include <linux/crypto.h> #include <linux/kernel.h> #include <linux/slab.h> /** * DOC: Authenticated Encryption With Associated Data (AEAD) Cipher API * * The AEAD cipher API is used with the ciphers of type CRYPTO_ALG_TYPE_AEAD * (listed as type "aead" in /proc/crypto) * * The most prominent examples for this type of encryption is GCM and CCM. * However, the kernel supports other types of AEAD ciphers which are defined * with the following cipher string: * * authenc(keyed message digest, block cipher) * * For example: authenc(hmac(sha256), cbc(aes)) * * The example code provided for the symmetric key cipher operation * applies here as well. Naturally all *skcipher* symbols must be exchanged * the *aead* pendants discussed in the following. In addition, for the AEAD * operation, the aead_request_set_ad function must be used to set the * pointer to the associated data memory location before performing the * encryption or decryption operation. In case of an encryption, the associated * data memory is filled during the encryption operation. For decryption, the * associated data memory must contain data that is used to verify the integrity * of the decrypted data. Another deviation from the asynchronous block cipher * operation is that the caller should explicitly check for -EBADMSG of the * crypto_aead_decrypt. That error indicates an authentication error, i.e. * a breach in the integrity of the message. In essence, that -EBADMSG error * code is the key bonus an AEAD cipher has over "standard" block chaining * modes. * * Memory Structure: * * The source scatterlist must contain the concatenation of * associated data || plaintext or ciphertext. * * The destination scatterlist has the same layout, except that the plaintext * (resp. ciphertext) will grow (resp. shrink) by the authentication tag size * during encryption (resp. decryption). * * In-place encryption/decryption is enabled by using the same scatterlist * pointer for both the source and destination. * * Even in the out-of-place case, space must be reserved in the destination for * the associated data, even though it won't be written to. This makes the * in-place and out-of-place cases more consistent. It is permissible for the * "destination" associated data to alias the "source" associated data. * * As with the other scatterlist crypto APIs, zero-length scatterlist elements * are not allowed in the used part of the scatterlist. Thus, if there is no * associated data, the first element must point to the plaintext/ciphertext. * * To meet the needs of IPsec, a special quirk applies to rfc4106, rfc4309, * rfc4543, and rfc7539esp ciphers. For these ciphers, the final 'ivsize' bytes * of the associated data buffer must contain a second copy of the IV. This is * in addition to the copy passed to aead_request_set_crypt(). These two IV * copies must not differ; different implementations of the same algorithm may * behave differently in that case. Note that the algorithm might not actually * treat the IV as associated data; nevertheless the length passed to * aead_request_set_ad() must include it. */ struct crypto_aead; /** * struct aead_request - AEAD request * @base: Common attributes for async crypto requests * @assoclen: Length in bytes of associated data for authentication * @cryptlen: Length of data to be encrypted or decrypted * @iv: Initialisation vector * @src: Source data * @dst: Destination data * @__ctx: Start of private context data */ struct aead_request { struct crypto_async_request base; unsigned int assoclen; unsigned int cryptlen; u8 *iv; struct scatterlist *src; struct scatterlist *dst; void *__ctx[] CRYPTO_MINALIGN_ATTR; }; /** * struct aead_alg - AEAD cipher definition * @maxauthsize: Set the maximum authentication tag size supported by the * transformation. A transformation may support smaller tag sizes. * As the authentication tag is a message digest to ensure the * integrity of the encrypted data, a consumer typically wants the * largest authentication tag possible as defined by this * variable. * @setauthsize: Set authentication size for the AEAD transformation. This * function is used to specify the consumer requested size of the * authentication tag to be either generated by the transformation * during encryption or the size of the authentication tag to be * supplied during the decryption operation. This function is also * responsible for checking the authentication tag size for * validity. * @setkey: see struct skcipher_alg * @encrypt: see struct skcipher_alg * @decrypt: see struct skcipher_alg * @ivsize: see struct skcipher_alg * @chunksize: see struct skcipher_alg * @init: Initialize the cryptographic transformation object. This function * is used to initialize the cryptographic transformation object. * This function is called only once at the instantiation time, right * after the transformation context was allocated. In case the * cryptographic hardware has some special requirements which need to * be handled by software, this function shall check for the precise * requirement of the transformation and put any software fallbacks * in place. * @exit: Deinitialize the cryptographic transformation object. This is a * counterpart to @init, used to remove various changes set in * @init. * @base: Definition of a generic crypto cipher algorithm. * * All fields except @ivsize is mandatory and must be filled. */ struct aead_alg { int (*setkey)(struct crypto_aead *tfm, const u8 *key, unsigned int keylen); int (*setauthsize)(struct crypto_aead *tfm, unsigned int authsize); int (*encrypt)(struct aead_request *req); int (*decrypt)(struct aead_request *req); int (*init)(struct crypto_aead *tfm); void (*exit)(struct crypto_aead *tfm); unsigned int ivsize; unsigned int maxauthsize; unsigned int chunksize; struct crypto_alg base; }; struct crypto_aead { unsigned int authsize; unsigned int reqsize; struct crypto_tfm base; }; static inline struct crypto_aead *__crypto_aead_cast(struct crypto_tfm *tfm) { return container_of(tfm, struct crypto_aead, base); } /** * crypto_alloc_aead() - allocate AEAD cipher handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * AEAD cipher * @type: specifies the type of the cipher * @mask: specifies the mask for the cipher * * Allocate a cipher handle for an AEAD. The returned struct * crypto_aead is the cipher handle that is required for any subsequent * API invocation for that AEAD. * * Return: allocated cipher handle in case of success; IS_ERR() is true in case * of an error, PTR_ERR() returns the error code. */ struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask); static inline struct crypto_tfm *crypto_aead_tfm(struct crypto_aead *tfm) { return &tfm->base; } /** * crypto_free_aead() - zeroize and free aead handle * @tfm: cipher handle to be freed * * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_aead(struct crypto_aead *tfm) { crypto_destroy_tfm(tfm, crypto_aead_tfm(tfm)); } static inline struct aead_alg *crypto_aead_alg(struct crypto_aead *tfm) { return container_of(crypto_aead_tfm(tfm)->__crt_alg, struct aead_alg, base); } static inline unsigned int crypto_aead_alg_ivsize(struct aead_alg *alg) { return alg->ivsize; } /** * crypto_aead_ivsize() - obtain IV size * @tfm: cipher handle * * The size of the IV for the aead referenced by the cipher handle is * returned. This IV size may be zero if the cipher does not need an IV. * * Return: IV size in bytes */ static inline unsigned int crypto_aead_ivsize(struct crypto_aead *tfm) { return crypto_aead_alg_ivsize(crypto_aead_alg(tfm)); } /** * crypto_aead_authsize() - obtain maximum authentication data size * @tfm: cipher handle * * The maximum size of the authentication data for the AEAD cipher referenced * by the AEAD cipher handle is returned. The authentication data size may be * zero if the cipher implements a hard-coded maximum. * * The authentication data may also be known as "tag value". * * Return: authentication data size / tag size in bytes */ static inline unsigned int crypto_aead_authsize(struct crypto_aead *tfm) { return tfm->authsize; } static inline unsigned int crypto_aead_alg_maxauthsize(struct aead_alg *alg) { return alg->maxauthsize; } static inline unsigned int crypto_aead_maxauthsize(struct crypto_aead *aead) { return crypto_aead_alg_maxauthsize(crypto_aead_alg(aead)); } /** * crypto_aead_blocksize() - obtain block size of cipher * @tfm: cipher handle * * The block size for the AEAD referenced with the cipher handle is returned. * The caller may use that information to allocate appropriate memory for the * data returned by the encryption or decryption operation * * Return: block size of cipher */ static inline unsigned int crypto_aead_blocksize(struct crypto_aead *tfm) { return crypto_tfm_alg_blocksize(crypto_aead_tfm(tfm)); } static inline unsigned int crypto_aead_alignmask(struct crypto_aead *tfm) { return crypto_tfm_alg_alignmask(crypto_aead_tfm(tfm)); } static inline u32 crypto_aead_get_flags(struct crypto_aead *tfm) { return crypto_tfm_get_flags(crypto_aead_tfm(tfm)); } static inline void crypto_aead_set_flags(struct crypto_aead *tfm, u32 flags) { crypto_tfm_set_flags(crypto_aead_tfm(tfm), flags); } static inline void crypto_aead_clear_flags(struct crypto_aead *tfm, u32 flags) { crypto_tfm_clear_flags(crypto_aead_tfm(tfm), flags); } /** * crypto_aead_setkey() - set key for cipher * @tfm: cipher handle * @key: buffer holding the key * @keylen: length of the key in bytes * * The caller provided key is set for the AEAD referenced by the cipher * handle. * * Note, the key length determines the cipher type. Many block ciphers implement * different cipher modes depending on the key size, such as AES-128 vs AES-192 * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 * is performed. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_aead_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int keylen); /** * crypto_aead_setauthsize() - set authentication data size * @tfm: cipher handle * @authsize: size of the authentication data / tag in bytes * * Set the authentication data size / tag size. AEAD requires an authentication * tag (or MAC) in addition to the associated data. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize); static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req) { return __crypto_aead_cast(req->base.tfm); } /** * crypto_aead_encrypt() - encrypt plaintext * @req: reference to the aead_request handle that holds all information * needed to perform the cipher operation * * Encrypt plaintext data using the aead_request handle. That data structure * and how it is filled with data is discussed with the aead_request_* * functions. * * IMPORTANT NOTE The encryption operation creates the authentication data / * tag. That data is concatenated with the created ciphertext. * The ciphertext memory size is therefore the given number of * block cipher blocks + the size defined by the * crypto_aead_setauthsize invocation. The caller must ensure * that sufficient memory is available for the ciphertext and * the authentication tag. * * Return: 0 if the cipher operation was successful; < 0 if an error occurred */ int crypto_aead_encrypt(struct aead_request *req); /** * crypto_aead_decrypt() - decrypt ciphertext * @req: reference to the aead_request handle that holds all information * needed to perform the cipher operation * * Decrypt ciphertext data using the aead_request handle. That data structure * and how it is filled with data is discussed with the aead_request_* * functions. * * IMPORTANT NOTE The caller must concatenate the ciphertext followed by the * authentication data / tag. That authentication data / tag * must have the size defined by the crypto_aead_setauthsize * invocation. * * * Return: 0 if the cipher operation was successful; -EBADMSG: The AEAD * cipher operation performs the authentication of the data during the * decryption operation. Therefore, the function returns this error if * the authentication of the ciphertext was unsuccessful (i.e. the * integrity of the ciphertext or the associated data was violated); * < 0 if an error occurred. */ int crypto_aead_decrypt(struct aead_request *req); /** * DOC: Asynchronous AEAD Request Handle * * The aead_request data structure contains all pointers to data required for * the AEAD cipher operation. This includes the cipher handle (which can be * used by multiple aead_request instances), pointer to plaintext and * ciphertext, asynchronous callback function, etc. It acts as a handle to the * aead_request_* API calls in a similar way as AEAD handle to the * crypto_aead_* API calls. */ /** * crypto_aead_reqsize() - obtain size of the request data structure * @tfm: cipher handle * * Return: number of bytes */ static inline unsigned int crypto_aead_reqsize(struct crypto_aead *tfm) { return tfm->reqsize; } /** * aead_request_set_tfm() - update cipher handle reference in request * @req: request handle to be modified * @tfm: cipher handle that shall be added to the request handle * * Allow the caller to replace the existing aead handle in the request * data structure with a different one. */ static inline void aead_request_set_tfm(struct aead_request *req, struct crypto_aead *tfm) { req->base.tfm = crypto_aead_tfm(tfm); } /** * aead_request_alloc() - allocate request data structure * @tfm: cipher handle to be registered with the request * @gfp: memory allocation flag that is handed to kmalloc by the API call. * * Allocate the request data structure that must be used with the AEAD * encrypt and decrypt API calls. During the allocation, the provided aead * handle is registered in the request data structure. * * Return: allocated request handle in case of success, or NULL if out of memory */ static inline struct aead_request *aead_request_alloc(struct crypto_aead *tfm, gfp_t gfp) { struct aead_request *req; req = kmalloc(sizeof(*req) + crypto_aead_reqsize(tfm), gfp); if (likely(req)) aead_request_set_tfm(req, tfm); return req; } /** * aead_request_free() - zeroize and free request data structure * @req: request data structure cipher handle to be freed */ static inline void aead_request_free(struct aead_request *req) { kfree_sensitive(req); } /** * aead_request_set_callback() - set asynchronous callback function * @req: request handle * @flags: specify zero or an ORing of the flags * CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and * increase the wait queue beyond the initial maximum size; * CRYPTO_TFM_REQ_MAY_SLEEP the request processing may sleep * @compl: callback function pointer to be registered with the request handle * @data: The data pointer refers to memory that is not used by the kernel * crypto API, but provided to the callback function for it to use. Here, * the caller can provide a reference to memory the callback function can * operate on. As the callback function is invoked asynchronously to the * related functionality, it may need to access data structures of the * related functionality which can be referenced using this pointer. The * callback function can access the memory via the "data" field in the * crypto_async_request data structure provided to the callback function. * * Setting the callback function that is triggered once the cipher operation * completes * * The callback function is registered with the aead_request handle and * must comply with the following template:: * * void callback_function(struct crypto_async_request *req, int error) */ static inline void aead_request_set_callback(struct aead_request *req, u32 flags, crypto_completion_t compl, void *data) { req->base.complete = compl; req->base.data = data; req->base.flags = flags; } /** * aead_request_set_crypt - set data buffers * @req: request handle * @src: source scatter / gather list * @dst: destination scatter / gather list * @cryptlen: number of bytes to process from @src * @iv: IV for the cipher operation which must comply with the IV size defined * by crypto_aead_ivsize() * * Setting the source data and destination data scatter / gather lists which * hold the associated data concatenated with the plaintext or ciphertext. See * below for the authentication tag. * * For encryption, the source is treated as the plaintext and the * destination is the ciphertext. For a decryption operation, the use is * reversed - the source is the ciphertext and the destination is the plaintext. * * The memory structure for cipher operation has the following structure: * * - AEAD encryption input: assoc data || plaintext * - AEAD encryption output: assoc data || cipherntext || auth tag * - AEAD decryption input: assoc data || ciphertext || auth tag * - AEAD decryption output: assoc data || plaintext * * Albeit the kernel requires the presence of the AAD buffer, however, * the kernel does not fill the AAD buffer in the output case. If the * caller wants to have that data buffer filled, the caller must either * use an in-place cipher operation (i.e. same memory location for * input/output memory location). */ static inline void aead_request_set_crypt(struct aead_request *req, struct scatterlist *src, struct scatterlist *dst, unsigned int cryptlen, u8 *iv) { req->src = src; req->dst = dst; req->cryptlen = cryptlen; req->iv = iv; } /** * aead_request_set_ad - set associated data information * @req: request handle * @assoclen: number of bytes in associated data * * Setting the AD information. This function sets the length of * the associated data. */ static inline void aead_request_set_ad(struct aead_request *req, unsigned int assoclen) { req->assoclen = assoclen; } #endif /* _CRYPTO_AEAD_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _SOCK_REUSEPORT_H #define _SOCK_REUSEPORT_H #include <linux/filter.h> #include <linux/skbuff.h> #include <linux/types.h> #include <linux/spinlock.h> #include <net/sock.h> extern spinlock_t reuseport_lock; struct sock_reuseport { struct rcu_head rcu; u16 max_socks; /* length of socks */ u16 num_socks; /* elements in socks */ /* The last synq overflow event timestamp of this * reuse->socks[] group. */ unsigned int synq_overflow_ts; /* ID stays the same even after the size of socks[] grows. */ unsigned int reuseport_id; unsigned int bind_inany:1; unsigned int has_conns:1; struct bpf_prog __rcu *prog; /* optional BPF sock selector */ struct sock *socks[]; /* array of sock pointers */ }; extern int reuseport_alloc(struct sock *sk, bool bind_inany); extern int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany); extern void reuseport_detach_sock(struct sock *sk); extern struct sock *reuseport_select_sock(struct sock *sk, u32 hash, struct sk_buff *skb, int hdr_len); extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog); extern int reuseport_detach_prog(struct sock *sk); static inline bool reuseport_has_conns(struct sock *sk, bool set) { struct sock_reuseport *reuse; bool ret = false; rcu_read_lock(); reuse = rcu_dereference(sk->sk_reuseport_cb); if (reuse) { if (set) reuse->has_conns = 1; ret = reuse->has_conns; } rcu_read_unlock(); return ret; } #endif /* _SOCK_REUSEPORT_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the UDP protocol. * * Version: @(#)udp.h 1.0.2 04/28/93 * * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> */ #ifndef _LINUX_UDP_H #define _LINUX_UDP_H #include <net/inet_sock.h> #include <linux/skbuff.h> #include <net/netns/hash.h> #include <uapi/linux/udp.h> static inline struct udphdr *udp_hdr(const struct sk_buff *skb) { return (struct udphdr *)skb_transport_header(skb); } static inline struct udphdr *inner_udp_hdr(const struct sk_buff *skb) { return (struct udphdr *)skb_inner_transport_header(skb); } #define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256) static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) { return (num + net_hash_mix(net)) & mask; } struct udp_sock { /* inet_sock has to be the first member */ struct inet_sock inet; #define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0] #define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1] #define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node int pending; /* Any pending frames ? */ unsigned int corkflag; /* Cork is required */ __u8 encap_type; /* Is this an Encapsulation socket? */ unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ encap_enabled:1, /* This socket enabled encap * processing; UDP tunnels and * different encapsulation layer set * this */ gro_enabled:1, /* Request GRO aggregation */ accept_udp_l4:1, accept_udp_fraglist:1; /* * Following member retains the information to create a UDP header * when the socket is uncorked. */ __u16 len; /* total length of pending frames */ __u16 gso_size; /* * Fields specific to UDP-Lite. */ __u16 pcslen; __u16 pcrlen; /* indicator bits used by pcflag: */ #define UDPLITE_BIT 0x1 /* set by udplite proto init function */ #define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ #define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ __u8 unused[3]; /* * For encapsulation sockets. */ int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb); void (*encap_destroy)(struct sock *sk); /* GRO functions for UDP socket */ struct sk_buff * (*gro_receive)(struct sock *sk, struct list_head *head, struct sk_buff *skb); int (*gro_complete)(struct sock *sk, struct sk_buff *skb, int nhoff); /* udp_recvmsg try to use this before splicing sk_receive_queue */ struct sk_buff_head reader_queue ____cacheline_aligned_in_smp; /* This field is dirtied by udp_recvmsg() */ int forward_deficit; }; #define UDP_MAX_SEGMENTS (1 << 6UL) static inline struct udp_sock *udp_sk(const struct sock *sk) { return (struct udp_sock *)sk; } static inline void udp_set_no_check6_tx(struct sock *sk, bool val) { udp_sk(sk)->no_check6_tx = val; } static inline void udp_set_no_check6_rx(struct sock *sk, bool val) { udp_sk(sk)->no_check6_rx = val; } static inline bool udp_get_no_check6_tx(struct sock *sk) { return udp_sk(sk)->no_check6_tx; } static inline bool udp_get_no_check6_rx(struct sock *sk) { return udp_sk(sk)->no_check6_rx; } static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { int gso_size; if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { gso_size = skb_shinfo(skb)->gso_size; put_cmsg(msg, SOL_UDP, UDP_GRO, sizeof(gso_size), &gso_size); } } static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) { if (!skb_is_gso(skb)) return false; if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4) return true; if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist) return true; return false; } #define udp_portaddr_for_each_entry(__sk, list) \ hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node) #define udp_portaddr_for_each_entry_rcu(__sk, list) \ hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node) #define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE) #endif /* _LINUX_UDP_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Generic associative array implementation. * * See Documentation/core-api/assoc_array.rst for information. * * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef _LINUX_ASSOC_ARRAY_H #define _LINUX_ASSOC_ARRAY_H #ifdef CONFIG_ASSOCIATIVE_ARRAY #include <linux/types.h> #define ASSOC_ARRAY_KEY_CHUNK_SIZE BITS_PER_LONG /* Key data retrieved in chunks of this size */ /* * Generic associative array. */ struct assoc_array { struct assoc_array_ptr *root; /* The node at the root of the tree */ unsigned long nr_leaves_on_tree; }; /* * Operations on objects and index keys for use by array manipulation routines. */ struct assoc_array_ops { /* Method to get a chunk of an index key from caller-supplied data */ unsigned long (*get_key_chunk)(const void *index_key, int level); /* Method to get a piece of an object's index key */ unsigned long (*get_object_key_chunk)(const void *object, int level); /* Is this the object we're looking for? */ bool (*compare_object)(const void *object, const void *index_key); /* How different is an object from an index key, to a bit position in * their keys? (or -1 if they're the same) */ int (*diff_objects)(const void *object, const void *index_key); /* Method to free an object. */ void (*free_object)(void *object); }; /* * Access and manipulation functions. */ struct assoc_array_edit; static inline void assoc_array_init(struct assoc_array *array) { array->root = NULL; array->nr_leaves_on_tree = 0; } extern int assoc_array_iterate(const struct assoc_array *array, int (*iterator)(const void *object, void *iterator_data), void *iterator_data); extern void *assoc_array_find(const struct assoc_array *array, const struct assoc_array_ops *ops, const void *index_key); extern void assoc_array_destroy(struct assoc_array *array, const struct assoc_array_ops *ops); extern struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, const struct assoc_array_ops *ops, const void *index_key, void *object); extern void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object); extern struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, const struct assoc_array_ops *ops, const void *index_key); extern struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, const struct assoc_array_ops *ops); extern void assoc_array_apply_edit(struct assoc_array_edit *edit); extern void assoc_array_cancel_edit(struct assoc_array_edit *edit); extern int assoc_array_gc(struct assoc_array *array, const struct assoc_array_ops *ops, bool (*iterator)(void *object, void *iterator_data), void *iterator_data); #endif /* CONFIG_ASSOCIATIVE_ARRAY */ #endif /* _LINUX_ASSOC_ARRAY_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM migrate #if !defined(_TRACE_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MIGRATE_H #include <linux/tracepoint.h> #define MIGRATE_MODE \ EM( MIGRATE_ASYNC, "MIGRATE_ASYNC") \ EM( MIGRATE_SYNC_LIGHT, "MIGRATE_SYNC_LIGHT") \ EMe(MIGRATE_SYNC, "MIGRATE_SYNC") #define MIGRATE_REASON \ EM( MR_COMPACTION, "compaction") \ EM( MR_MEMORY_FAILURE, "memory_failure") \ EM( MR_MEMORY_HOTPLUG, "memory_hotplug") \ EM( MR_SYSCALL, "syscall_or_cpuset") \ EM( MR_MEMPOLICY_MBIND, "mempolicy_mbind") \ EM( MR_NUMA_MISPLACED, "numa_misplaced") \ EMe(MR_CONTIG_RANGE, "contig_range") /* * First define the enums in the above macros to be exported to userspace * via TRACE_DEFINE_ENUM(). */ #undef EM #undef EMe #define EM(a, b) TRACE_DEFINE_ENUM(a); #define EMe(a, b) TRACE_DEFINE_ENUM(a); MIGRATE_MODE MIGRATE_REASON /* * Now redefine the EM() and EMe() macros to map the enums to the strings * that will be printed in the output. */ #undef EM #undef EMe #define EM(a, b) {a, b}, #define EMe(a, b) {a, b} TRACE_EVENT(mm_migrate_pages, TP_PROTO(unsigned long succeeded, unsigned long failed, unsigned long thp_succeeded, unsigned long thp_failed, unsigned long thp_split, enum migrate_mode mode, int reason), TP_ARGS(succeeded, failed, thp_succeeded, thp_failed, thp_split, mode, reason), TP_STRUCT__entry( __field( unsigned long, succeeded) __field( unsigned long, failed) __field( unsigned long, thp_succeeded) __field( unsigned long, thp_failed) __field( unsigned long, thp_split) __field( enum migrate_mode, mode) __field( int, reason) ), TP_fast_assign( __entry->succeeded = succeeded; __entry->failed = failed; __entry->thp_succeeded = thp_succeeded; __entry->thp_failed = thp_failed; __entry->thp_split = thp_split; __entry->mode = mode; __entry->reason = reason; ), TP_printk("nr_succeeded=%lu nr_failed=%lu nr_thp_succeeded=%lu nr_thp_failed=%lu nr_thp_split=%lu mode=%s reason=%s", __entry->succeeded, __entry->failed, __entry->thp_succeeded, __entry->thp_failed, __entry->thp_split, __print_symbolic(__entry->mode, MIGRATE_MODE), __print_symbolic(__entry->reason, MIGRATE_REASON)) ); #endif /* _TRACE_MIGRATE_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _LINUX_IO_URING_H #define _LINUX_IO_URING_H #include <linux/sched.h> #include <linux/xarray.h> struct io_identity { struct files_struct *files; struct mm_struct *mm; #ifdef CONFIG_BLK_CGROUP struct cgroup_subsys_state *blkcg_css; #endif const struct cred *creds; struct nsproxy *nsproxy; struct fs_struct *fs; unsigned long fsize; #ifdef CONFIG_AUDIT kuid_t loginuid; unsigned int sessionid; #endif refcount_t count; }; struct io_uring_task { /* submission side */ struct xarray xa; struct wait_queue_head wait; struct file *last; struct percpu_counter inflight; struct io_identity __identity; struct io_identity *identity; atomic_t in_idle; bool sqpoll; }; #if defined(CONFIG_IO_URING) struct sock *io_uring_get_socket(struct file *file); void __io_uring_task_cancel(void); void __io_uring_files_cancel(struct files_struct *files); void __io_uring_free(struct task_struct *tsk); static inline void io_uring_task_cancel(void) { if (current->io_uring && !xa_empty(&current->io_uring->xa)) __io_uring_task_cancel(); } static inline void io_uring_files_cancel(struct files_struct *files) { if (current->io_uring && !xa_empty(&current->io_uring->xa)) __io_uring_files_cancel(files); } static inline void io_uring_free(struct task_struct *tsk) { if (tsk->io_uring) __io_uring_free(tsk); } #else static inline struct sock *io_uring_get_socket(struct file *file) { return NULL; } static inline void io_uring_task_cancel(void) { } static inline void io_uring_files_cancel(struct files_struct *files) { } static inline void io_uring_free(struct task_struct *tsk) { } #endif #endif
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PTRACE_H #define _ASM_X86_PTRACE_H #include <asm/segment.h> #include <asm/page_types.h> #include <uapi/asm/ptrace.h> #ifndef __ASSEMBLY__ #ifdef __i386__ struct pt_regs { /* * NB: 32-bit x86 CPUs are inconsistent as what happens in the * following cases (where %seg represents a segment register): * * - pushl %seg: some do a 16-bit write and leave the high * bits alone * - movl %seg, [mem]: some do a 16-bit write despite the movl * - IDT entry: some (e.g. 486) will leave the high bits of CS * and (if applicable) SS undefined. * * Fortunately, x86-32 doesn't read the high bits on POP or IRET, * so we can just treat all of the segment registers as 16-bit * values. */ unsigned long bx; unsigned long cx; unsigned long dx; unsigned long si; unsigned long di; unsigned long bp; unsigned long ax; unsigned short ds; unsigned short __dsh; unsigned short es; unsigned short __esh; unsigned short fs; unsigned short __fsh; /* On interrupt, gs and __gsh store the vector number. */ unsigned short gs; unsigned short __gsh; /* On interrupt, this is the error code. */ unsigned long orig_ax; unsigned long ip; unsigned short cs; unsigned short __csh; unsigned long flags; unsigned long sp; unsigned short ss; unsigned short __ssh; }; #else /* __i386__ */ struct pt_regs { /* * C ABI says these regs are callee-preserved. They aren't saved on kernel entry * unless syscall needs a complete, fully filled "struct pt_regs". */ unsigned long r15; unsigned long r14; unsigned long r13; unsigned long r12; unsigned long bp; unsigned long bx; /* These regs are callee-clobbered. Always saved on kernel entry. */ unsigned long r11; unsigned long r10; unsigned long r9; unsigned long r8; unsigned long ax; unsigned long cx; unsigned long dx; unsigned long si; unsigned long di; /* * On syscall entry, this is syscall#. On CPU exception, this is error code. * On hw interrupt, it's IRQ number: */ unsigned long orig_ax; /* Return frame for iretq */ unsigned long ip; unsigned long cs; unsigned long flags; unsigned long sp; unsigned long ss; /* top of stack page */ }; #endif /* !__i386__ */ #ifdef CONFIG_PARAVIRT #include <asm/paravirt_types.h> #endif #include <asm/proto.h> struct cpuinfo_x86; struct task_struct; extern unsigned long profile_pc(struct pt_regs *regs); extern unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); extern void send_sigtrap(struct pt_regs *regs, int error_code, int si_code); static inline unsigned long regs_return_value(struct pt_regs *regs) { return regs->ax; } static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) { regs->ax = rc; } /* * user_mode(regs) determines whether a register set came from user * mode. On x86_32, this is true if V8086 mode was enabled OR if the * register set was from protected mode with RPL-3 CS value. This * tricky test checks that with one comparison. * * On x86_64, vm86 mode is mercifully nonexistent, and we don't need * the extra check. */ static __always_inline int user_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_32 return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= USER_RPL; #else return !!(regs->cs & 3); #endif } static inline int v8086_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_32 return (regs->flags & X86_VM_MASK); #else return 0; /* No V86 mode support in long mode */ #endif } static inline bool user_64bit_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_64 #ifndef CONFIG_PARAVIRT_XXL /* * On non-paravirt systems, this is the only long mode CPL 3 * selector. We do not allow long mode selectors in the LDT. */ return regs->cs == __USER_CS; #else /* Headers are too twisted for this to go in paravirt.h. */ return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs; #endif #else /* !CONFIG_X86_64 */ return false; #endif } /* * Determine whether the register set came from any context that is running in * 64-bit mode. */ static inline bool any_64bit_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_64 return !user_mode(regs) || user_64bit_mode(regs); #else return false; #endif } #ifdef CONFIG_X86_64 #define current_user_stack_pointer() current_pt_regs()->sp #define compat_user_stack_pointer() current_pt_regs()->sp static inline bool ip_within_syscall_gap(struct pt_regs *regs) { bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 && regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack); #ifdef CONFIG_IA32_EMULATION ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat && regs->ip < (unsigned long)entry_SYSCALL_compat_safe_stack); #endif return ret; } #endif static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) { return regs->sp; } static inline unsigned long instruction_pointer(struct pt_regs *regs) { return regs->ip; } static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long val) { regs->ip = val; } static inline unsigned long frame_pointer(struct pt_regs *regs) { return regs->bp; } static inline unsigned long user_stack_pointer(struct pt_regs *regs) { return regs->sp; } static inline void user_stack_pointer_set(struct pt_regs *regs, unsigned long val) { regs->sp = val; } static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) { return !(regs->flags & X86_EFLAGS_IF); } /* Query offset/name of register from its name/offset */ extern int regs_query_register_offset(const char *name); extern const char *regs_query_register_name(unsigned int offset); #define MAX_REG_OFFSET (offsetof(struct pt_regs, ss)) /** * regs_get_register() - get register value from its offset * @regs: pt_regs from which register value is gotten. * @offset: offset number of the register. * * regs_get_register returns the value of a register. The @offset is the * offset of the register in struct pt_regs address which specified by @regs. * If @offset is bigger than MAX_REG_OFFSET, this returns 0. */ static inline unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset) { if (unlikely(offset > MAX_REG_OFFSET)) return 0; #ifdef CONFIG_X86_32 /* The selector fields are 16-bit. */ if (offset == offsetof(struct pt_regs, cs) || offset == offsetof(struct pt_regs, ss) || offset == offsetof(struct pt_regs, ds) || offset == offsetof(struct pt_regs, es) || offset == offsetof(struct pt_regs, fs) || offset == offsetof(struct pt_regs, gs)) { return *(u16 *)((unsigned long)regs + offset); } #endif return *(unsigned long *)((unsigned long)regs + offset); } /** * regs_within_kernel_stack() - check the address in the stack * @regs: pt_regs which contains kernel stack pointer. * @addr: address which is checked. * * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). * If @addr is within the kernel stack, it returns true. If not, returns false. */ static inline int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) { return ((addr & ~(THREAD_SIZE - 1)) == (regs->sp & ~(THREAD_SIZE - 1))); } /** * regs_get_kernel_stack_nth_addr() - get the address of the Nth entry on stack * @regs: pt_regs which contains kernel stack pointer. * @n: stack entry number. * * regs_get_kernel_stack_nth() returns the address of the @n th entry of the * kernel stack which is specified by @regs. If the @n th entry is NOT in * the kernel stack, this returns NULL. */ static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n) { unsigned long *addr = (unsigned long *)regs->sp; addr += n; if (regs_within_kernel_stack(regs, (unsigned long)addr)) return addr; else return NULL; } /* To avoid include hell, we can't include uaccess.h */ extern long copy_from_kernel_nofault(void *dst, const void *src, size_t size); /** * regs_get_kernel_stack_nth() - get Nth entry of the stack * @regs: pt_regs which contains kernel stack pointer. * @n: stack entry number. * * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which * is specified by @regs. If the @n th entry is NOT in the kernel stack * this returns 0. */ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) { unsigned long *addr; unsigned long val; long ret; addr = regs_get_kernel_stack_nth_addr(regs, n); if (addr) { ret = copy_from_kernel_nofault(&val, addr, sizeof(val)); if (!ret) return val; } return 0; } /** * regs_get_kernel_argument() - get Nth function argument in kernel * @regs: pt_regs of that context * @n: function argument number (start from 0) * * regs_get_argument() returns @n th argument of the function call. * Note that this chooses most probably assignment, in some case * it can be incorrect. * This is expected to be called from kprobes or ftrace with regs * where the top of stack is the return address. */ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, unsigned int n) { static const unsigned int argument_offs[] = { #ifdef __i386__ offsetof(struct pt_regs, ax), offsetof(struct pt_regs, dx), offsetof(struct pt_regs, cx), #define NR_REG_ARGUMENTS 3 #else offsetof(struct pt_regs, di), offsetof(struct pt_regs, si), offsetof(struct pt_regs, dx), offsetof(struct pt_regs, cx), offsetof(struct pt_regs, r8), offsetof(struct pt_regs, r9), #define NR_REG_ARGUMENTS 6 #endif }; if (n >= NR_REG_ARGUMENTS) { n -= NR_REG_ARGUMENTS - 1; return regs_get_kernel_stack_nth(regs, n); } else return regs_get_register(regs, argument_offs[n]); } #define arch_has_single_step() (1) #ifdef CONFIG_X86_DEBUGCTLMSR #define arch_has_block_step() (1) #else #define arch_has_block_step() (boot_cpu_data.x86 >= 6) #endif #define ARCH_HAS_USER_SINGLE_STEP_REPORT struct user_desc; extern int do_get_thread_area(struct task_struct *p, int idx, struct user_desc __user *info); extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); #ifdef CONFIG_X86_64 # define do_set_thread_area_64(p, s, t) do_arch_prctl_64(p, s, t) #else # define do_set_thread_area_64(p, s, t) (0) #endif #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PTRACE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 /* SPDX-License-Identifier: GPL-2.0 */ /* * Linux Socket Filter Data Structures */ #ifndef __LINUX_FILTER_H__ #define __LINUX_FILTER_H__ #include <stdarg.h> #include <linux/atomic.h> #include <linux/refcount.h> #include <linux/compat.h> #include <linux/skbuff.h> #include <linux/linkage.h> #include <linux/printk.h> #include <linux/workqueue.h> #include <linux/sched.h> #include <linux/capability.h> #include <linux/set_memory.h> #include <linux/kallsyms.h> #include <linux/if_vlan.h> #include <linux/vmalloc.h> #include <linux/sockptr.h> #include <crypto/sha.h> #include <net/sch_generic.h> #include <asm/byteorder.h> #include <uapi/linux/filter.h> #include <uapi/linux/bpf.h> struct sk_buff; struct sock; struct seccomp_data; struct bpf_prog_aux; struct xdp_rxq_info; struct xdp_buff; struct sock_reuseport; struct ctl_table; struct ctl_table_header; /* ArgX, context and stack frame pointer register positions. Note, * Arg1, Arg2, Arg3, etc are used as argument mappings of function * calls in BPF_CALL instruction. */ #define BPF_REG_ARG1 BPF_REG_1 #define BPF_REG_ARG2 BPF_REG_2 #define BPF_REG_ARG3 BPF_REG_3 #define BPF_REG_ARG4 BPF_REG_4 #define BPF_REG_ARG5 BPF_REG_5 #define BPF_REG_CTX BPF_REG_6 #define BPF_REG_FP BPF_REG_10 /* Additional register mappings for converted user programs. */ #define BPF_REG_A BPF_REG_0 #define BPF_REG_X BPF_REG_7 #define BPF_REG_TMP BPF_REG_2 /* scratch reg */ #define BPF_REG_D BPF_REG_8 /* data, callee-saved */ #define BPF_REG_H BPF_REG_9 /* hlen, callee-saved */ /* Kernel hidden auxiliary/helper register. */ #define BPF_REG_AX MAX_BPF_REG #define MAX_BPF_EXT_REG (MAX_BPF_REG + 1) #define MAX_BPF_JIT_REG MAX_BPF_EXT_REG /* unused opcode to mark special call to bpf_tail_call() helper */ #define BPF_TAIL_CALL 0xf0 /* unused opcode to mark special load instruction. Same as BPF_ABS */ #define BPF_PROBE_MEM 0x20 /* unused opcode to mark call to interpreter with arguments */ #define BPF_CALL_ARGS 0xe0 /* unused opcode to mark speculation barrier for mitigating * Speculative Store Bypass */ #define BPF_NOSPEC 0xc0 /* As per nm, we expose JITed images as text (code) section for * kallsyms. That way, tools like perf can find it to match * addresses. */ #define BPF_SYM_ELF_TYPE 't' /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 /* Helper macros for filter block array initializers. */ /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ #define BPF_ALU64_REG(OP, DST, SRC) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = 0 }) #define BPF_ALU32_REG(OP, DST, SRC) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = 0 }) /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ #define BPF_ALU64_IMM(OP, DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) #define BPF_ALU32_IMM(OP, DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) /* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */ #define BPF_ENDIAN(TYPE, DST, LEN) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_END | BPF_SRC(TYPE), \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = LEN }) /* Short form of mov, dst_reg = src_reg */ #define BPF_MOV64_REG(DST, SRC) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_MOV | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = 0 }) #define BPF_MOV32_REG(DST, SRC) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_MOV | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = 0 }) /* Short form of mov, dst_reg = imm32 */ #define BPF_MOV64_IMM(DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_MOV | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) #define BPF_MOV32_IMM(DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_MOV | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) /* Special form of mov32, used for doing explicit zero extension on dst. */ #define BPF_ZEXT_REG(DST) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_MOV | BPF_X, \ .dst_reg = DST, \ .src_reg = DST, \ .off = 0, \ .imm = 1 }) static inline bool insn_is_zext(const struct bpf_insn *insn) { return insn->code == (BPF_ALU | BPF_MOV | BPF_X) && insn->imm == 1; } /* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ #define BPF_LD_IMM64(DST, IMM) \ BPF_LD_IMM64_RAW(DST, 0, IMM) #define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ ((struct bpf_insn) { \ .code = BPF_LD | BPF_DW | BPF_IMM, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = (__u32) (IMM) }), \ ((struct bpf_insn) { \ .code = 0, /* zero is reserved opcode */ \ .dst_reg = 0, \ .src_reg = 0, \ .off = 0, \ .imm = ((__u64) (IMM)) >> 32 }) /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ #define BPF_LD_MAP_FD(DST, MAP_FD) \ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */ #define BPF_MOV64_RAW(TYPE, DST, SRC, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE), \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = IMM }) #define BPF_MOV32_RAW(TYPE, DST, SRC, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_MOV | BPF_SRC(TYPE), \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = IMM }) /* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ #define BPF_LD_ABS(SIZE, IMM) \ ((struct bpf_insn) { \ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ .dst_reg = 0, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) /* Indirect packet access, R0 = *(uint *) (skb->data + src_reg + imm32) */ #define BPF_LD_IND(SIZE, SRC, IMM) \ ((struct bpf_insn) { \ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_IND, \ .dst_reg = 0, \ .src_reg = SRC, \ .off = 0, \ .imm = IMM }) /* Memory load, dst_reg = *(uint *) (src_reg + off16) */ #define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) /* Memory store, *(uint *) (dst_reg + off16) = src_reg */ #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ ((struct bpf_insn) {