1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 /* SPDX-License-Identifier: GPL-2.0 */ /* * A security context is a set of security attributes * associated with each subject and object controlled * by the security policy. Security contexts are * externally represented as variable-length strings * that can be interpreted by a user or application * with an understanding of the security policy. * Internally, the security server uses a simple * structure. This structure is private to the * security server and can be changed without affecting * clients of the security server. * * Author : Stephen Smalley, <sds@tycho.nsa.gov> */ #ifndef _SS_CONTEXT_H_ #define _SS_CONTEXT_H_ #include "ebitmap.h" #include "mls_types.h" #include "security.h" /* * A security context consists of an authenticated user * identity, a role, a type and a MLS range. */ struct context { u32 user; u32 role; u32 type; u32 len; /* length of string in bytes */ struct mls_range range; char *str; /* string representation if context cannot be mapped. */ }; static inline void mls_context_init(struct context *c) { memset(&c->range, 0, sizeof(c->range)); } static inline int mls_context_cpy(struct context *dst, struct context *src) { int rc; dst->range.level[0].sens = src->range.level[0].sens; rc = ebitmap_cpy(&dst->range.level[0].cat, &src->range.level[0].cat); if (rc) goto out; dst->range.level[1].sens = src->range.level[1].sens; rc = ebitmap_cpy(&dst->range.level[1].cat, &src->range.level[1].cat); if (rc) ebitmap_destroy(&dst->range.level[0].cat); out: return rc; } /* * Sets both levels in the MLS range of 'dst' to the low level of 'src'. */ static inline int mls_context_cpy_low(struct context *dst, struct context *src) { int rc; dst->range.level[0].sens = src->range.level[0].sens; rc = ebitmap_cpy(&dst->range.level[0].cat, &src->range.level[0].cat); if (rc) goto out; dst->range.level[1].sens = src->range.level[0].sens; rc = ebitmap_cpy(&dst->range.level[1].cat, &src->range.level[0].cat); if (rc) ebitmap_destroy(&dst->range.level[0].cat); out: return rc; } /* * Sets both levels in the MLS range of 'dst' to the high level of 'src'. */ static inline int mls_context_cpy_high(struct context *dst, struct context *src) { int rc; dst->range.level[0].sens = src->range.level[1].sens; rc = ebitmap_cpy(&dst->range.level[0].cat, &src->range.level[1].cat); if (rc) goto out; dst->range.level[1].sens = src->range.level[1].sens; rc = ebitmap_cpy(&dst->range.level[1].cat, &src->range.level[1].cat); if (rc) ebitmap_destroy(&dst->range.level[0].cat); out: return rc; } static inline int mls_context_glblub(struct context *dst, struct context *c1, struct context *c2) { struct mls_range *dr = &dst->range, *r1 = &c1->range, *r2 = &c2->range; int rc = 0; if (r1->level[1].sens < r2->level[0].sens || r2->level[1].sens < r1->level[0].sens) /* These ranges have no common sensitivities */ return -EINVAL; /* Take the greatest of the low */ dr->level[0].sens = max(r1->level[0].sens, r2->level[0].sens); /* Take the least of the high */ dr->level[1].sens = min(r1->level[1].sens, r2->level[1].sens); rc = ebitmap_and(&dr->level[0].cat, &r1->level[0].cat, &r2->level[0].cat); if (rc) goto out; rc = ebitmap_and(&dr->level[1].cat, &r1->level[1].cat, &r2->level[1].cat); if (rc) goto out; out: return rc; } static inline int mls_context_cmp(struct context *c1, struct context *c2) { return ((c1->range.level[0].sens == c2->range.level[0].sens) && ebitmap_cmp(&c1->range.level[0].cat, &c2->range.level[0].cat) && (c1->range.level[1].sens == c2->range.level[1].sens) && ebitmap_cmp(&c1->range.level[1].cat, &c2->range.level[1].cat)); } static inline void mls_context_destroy(struct context *c) { ebitmap_destroy(&c->range.level[0].cat); ebitmap_destroy(&c->range.level[1].cat); mls_context_init(c); } static inline void context_init(struct context *c) { memset(c, 0, sizeof(*c)); } static inline int context_cpy(struct context *dst, struct context *src) { int rc; dst->user = src->user; dst->role = src->role; dst->type = src->type; if (src->str) { dst->str = kstrdup(src->str, GFP_ATOMIC); if (!dst->str) return -ENOMEM; dst->len = src->len; } else { dst->str = NULL; dst->len = 0; } rc = mls_context_cpy(dst, src); if (rc) { kfree(dst->str); return rc; } return 0; } static inline void context_destroy(struct context *c) { c->user = c->role = c->type = 0; kfree(c->str); c->str = NULL; c->len = 0; mls_context_destroy(c); } static inline int context_cmp(struct context *c1, struct context *c2) { if (c1->len && c2->len) return (c1->len == c2->len && !strcmp(c1->str, c2->str)); if (c1->len || c2->len) return 0; return ((c1->user == c2->user) && (c1->role == c2->role) && (c1->type == c2->type) && mls_context_cmp(c1, c2)); } u32 context_compute_hash(const struct context *c); #endif /* _SS_CONTEXT_H_ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _LINUX_IO_URING_H #define _LINUX_IO_URING_H #include <linux/sched.h> #include <linux/xarray.h> struct io_identity { struct files_struct *files; struct mm_struct *mm; #ifdef CONFIG_BLK_CGROUP struct cgroup_subsys_state *blkcg_css; #endif const struct cred *creds; struct nsproxy *nsproxy; struct fs_struct *fs; unsigned long fsize; #ifdef CONFIG_AUDIT kuid_t loginuid; unsigned int sessionid; #endif refcount_t count; }; struct io_uring_task { /* submission side */ struct xarray xa; struct wait_queue_head wait; struct file *last; struct percpu_counter inflight; struct io_identity __identity; struct io_identity *identity; atomic_t in_idle; bool sqpoll; }; #if defined(CONFIG_IO_URING) struct sock *io_uring_get_socket(struct file *file); void __io_uring_task_cancel(void); void __io_uring_files_cancel(struct files_struct *files); void __io_uring_free(struct task_struct *tsk); static inline void io_uring_task_cancel(void) { if (current->io_uring && !xa_empty(&current->io_uring->xa)) __io_uring_task_cancel(); } static inline void io_uring_files_cancel(struct files_struct *files) { if (current->io_uring && !xa_empty(&current->io_uring->xa)) __io_uring_files_cancel(files); } static inline void io_uring_free(struct task_struct *tsk) { if (tsk->io_uring) __io_uring_free(tsk); } #else static inline struct sock *io_uring_get_socket(struct file *file) { return NULL; } static inline void io_uring_task_cancel(void) { } static inline void io_uring_files_cancel(struct files_struct *files) { } static inline void io_uring_free(struct task_struct *tsk) { } #endif #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PIPE_FS_I_H #define _LINUX_PIPE_FS_I_H #define PIPE_DEF_BUFFERS 16 #define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ #define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */ #define PIPE_BUF_FLAG_GIFT 0x04 /* page is a gift */ #define PIPE_BUF_FLAG_PACKET 0x08 /* read() as a packet */ #define PIPE_BUF_FLAG_CAN_MERGE 0x10 /* can merge buffers */ #define PIPE_BUF_FLAG_WHOLE 0x20 /* read() must return entire buffer or error */ #ifdef CONFIG_WATCH_QUEUE #define PIPE_BUF_FLAG_LOSS 0x40 /* Message loss happened after this buffer */ #endif /** * struct pipe_buffer - a linux kernel pipe buffer * @page: the page containing the data for the pipe buffer * @offset: offset of data inside the @page * @len: length of data inside the @page * @ops: operations associated with this buffer. See @pipe_buf_operations. * @flags: pipe buffer flags. See above. * @private: private data owned by the ops. **/ struct pipe_buffer { struct page *page; unsigned int offset, len; const struct pipe_buf_operations *ops; unsigned int flags; unsigned long private; }; /** * struct pipe_inode_info - a linux kernel pipe * @mutex: mutex protecting the whole thing * @rd_wait: reader wait point in case of empty pipe * @wr_wait: writer wait point in case of full pipe * @head: The point of buffer production * @tail: The point of buffer consumption * @note_loss: The next read() should insert a data-lost message * @max_usage: The maximum number of slots that may be used in the ring * @ring_size: total number of buffers (should be a power of 2) * @nr_accounted: The amount this pipe accounts for in user->pipe_bufs * @tmp_page: cached released page * @readers: number of current readers of this pipe * @writers: number of current writers of this pipe * @files: number of struct file referring this pipe (protected by ->i_lock) * @r_counter: reader counter * @w_counter: writer counter * @poll_usage: is this pipe used for epoll, which has crazy wakeups? * @fasync_readers: reader side fasync * @fasync_writers: writer side fasync * @bufs: the circular array of pipe buffers * @user: the user who created this pipe * @watch_queue: If this pipe is a watch_queue, this is the stuff for that **/ struct pipe_inode_info { struct mutex mutex; wait_queue_head_t rd_wait, wr_wait; unsigned int head; unsigned int tail; unsigned int max_usage; unsigned int ring_size; #ifdef CONFIG_WATCH_QUEUE bool note_loss; #endif unsigned int nr_accounted; unsigned int readers; unsigned int writers; unsigned int files; unsigned int r_counter; unsigned int w_counter; unsigned int poll_usage; struct page *tmp_page; struct fasync_struct *fasync_readers; struct fasync_struct *fasync_writers; struct pipe_buffer *bufs; struct user_struct *user; #ifdef CONFIG_WATCH_QUEUE struct watch_queue *watch_queue; #endif }; /* * Note on the nesting of these functions: * * ->confirm() * ->try_steal() * * That is, ->try_steal() must be called on a confirmed buffer. See below for * the meaning of each operation. Also see the kerneldoc in fs/pipe.c for the * pipe and generic variants of these hooks. */ struct pipe_buf_operations { /* * ->confirm() verifies that the data in the pipe buffer is there * and that the contents are good. If the pages in the pipe belong * to a file system, we may need to wait for IO completion in this * hook. Returns 0 for good, or a negative error value in case of * error. If not present all pages are considered good. */ int (*confirm)(struct pipe_inode_info *, struct pipe_buffer *); /* * When the contents of this pipe buffer has been completely * consumed by a reader, ->release() is called. */ void (*release)(struct pipe_inode_info *, struct pipe_buffer *); /* * Attempt to take ownership of the pipe buffer and its contents. * ->try_steal() returns %true for success, in which case the contents * of the pipe (the buf->page) is locked and now completely owned by the * caller. The page may then be transferred to a different mapping, the * most often used case is insertion into different file address space * cache. */ bool (*try_steal)(struct pipe_inode_info *, struct pipe_buffer *); /* * Get a reference to the pipe buffer. */ bool (*get)(struct pipe_inode_info *, struct pipe_buffer *); }; /** * pipe_empty - Return true if the pipe is empty * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer */ static inline bool pipe_empty(unsigned int head, unsigned int tail) { return head == tail; } /** * pipe_occupancy - Return number of slots used in the pipe * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer */ static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail) { return head - tail; } /** * pipe_full - Return true if the pipe is full * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer * @limit: The maximum amount of slots available. */ static inline bool pipe_full(unsigned int head, unsigned int tail, unsigned int limit) { return pipe_occupancy(head, tail) >= limit; } /** * pipe_space_for_user - Return number of slots available to userspace * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer * @pipe: The pipe info structure */ static inline unsigned int pipe_space_for_user(unsigned int head, unsigned int tail, struct pipe_inode_info *pipe) { unsigned int p_occupancy, p_space; p_occupancy = pipe_occupancy(head, tail); if (p_occupancy >= pipe->max_usage) return 0; p_space = pipe->ring_size - p_occupancy; if (p_space > pipe->max_usage) p_space = pipe->max_usage; return p_space; } /** * pipe_buf_get - get a reference to a pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to get a reference to * * Return: %true if the reference was successfully obtained. */ static inline __must_check bool pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { return buf->ops->get(pipe, buf); } /** * pipe_buf_release - put a reference to a pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to put a reference to */ static inline void pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { const struct pipe_buf_operations *ops = buf->ops; buf->ops = NULL; ops->release(pipe, buf); } /** * pipe_buf_confirm - verify contents of the pipe buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to confirm */ static inline int pipe_buf_confirm(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { if (!buf->ops->confirm) return 0; return buf->ops->confirm(pipe, buf); } /** * pipe_buf_try_steal - attempt to take ownership of a pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to attempt to steal */ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { if (!buf->ops->try_steal) return false; return buf->ops->try_steal(pipe, buf); } /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE /* Pipe lock and unlock operations */ void pipe_lock(struct pipe_inode_info *); void pipe_unlock(struct pipe_inode_info *); void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); extern unsigned int pipe_max_size; extern unsigned long pipe_user_pages_hard; extern unsigned long pipe_user_pages_soft; /* Wait for a pipe to be readable/writable while dropping the pipe lock */ void pipe_wait_readable(struct pipe_inode_info *); void pipe_wait_writable(struct pipe_inode_info *); struct pipe_inode_info *alloc_pipe_info(void); void free_pipe_info(struct pipe_inode_info *); /* Generic pipe buffer ops functions */ bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); bool generic_pipe_buf_try_steal(struct pipe_inode_info *, struct pipe_buffer *); void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); extern const struct pipe_buf_operations nosteal_pipe_buf_ops; #ifdef CONFIG_WATCH_QUEUE unsigned long account_pipe_buffers(struct user_struct *user, unsigned long old, unsigned long new); bool too_many_pipe_buffers_soft(unsigned long user_bufs); bool too_many_pipe_buffers_hard(unsigned long user_bufs); bool pipe_is_unprivileged_user(void); #endif /* for F_SETPIPE_SZ and F_GETPIPE_SZ */ #ifdef CONFIG_WATCH_QUEUE int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots); #endif long pipe_fcntl(struct file *, unsigned int, unsigned long arg); struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice); int create_pipe_files(struct file **, int); unsigned int round_pipe_size(unsigned long size); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NDISC_H #define _NDISC_H #include <net/ipv6_stubs.h> /* * ICMP codes for neighbour discovery messages */ #define NDISC_ROUTER_SOLICITATION 133 #define NDISC_ROUTER_ADVERTISEMENT 134 #define NDISC_NEIGHBOUR_SOLICITATION 135 #define NDISC_NEIGHBOUR_ADVERTISEMENT 136 #define NDISC_REDIRECT 137 /* * Router type: cross-layer information from link-layer to * IPv6 layer reported by certain link types (e.g., RFC4214). */ #define NDISC_NODETYPE_UNSPEC 0 /* unspecified (default) */ #define NDISC_NODETYPE_HOST 1 /* host or unauthorized router */ #define NDISC_NODETYPE_NODEFAULT 2 /* non-default router */ #define NDISC_NODETYPE_DEFAULT 3 /* default router */ /* * ndisc options */ enum { __ND_OPT_PREFIX_INFO_END = 0, ND_OPT_SOURCE_LL_ADDR = 1, /* RFC2461 */ ND_OPT_TARGET_LL_ADDR = 2, /* RFC2461 */ ND_OPT_PREFIX_INFO = 3, /* RFC2461 */ ND_OPT_REDIRECT_HDR = 4, /* RFC2461 */ ND_OPT_MTU = 5, /* RFC2461 */ ND_OPT_NONCE = 14, /* RFC7527 */ __ND_OPT_ARRAY_MAX, ND_OPT_ROUTE_INFO = 24, /* RFC4191 */ ND_OPT_RDNSS = 25, /* RFC5006 */ ND_OPT_DNSSL = 31, /* RFC6106 */ ND_OPT_6CO = 34, /* RFC6775 */ ND_OPT_CAPTIVE_PORTAL = 37, /* RFC7710 */ ND_OPT_PREF64 = 38, /* RFC8781 */ __ND_OPT_MAX }; #define MAX_RTR_SOLICITATION_DELAY HZ #define ND_REACHABLE_TIME (30*HZ) #define ND_RETRANS_TIMER HZ #include <linux/compiler.h> #include <linux/icmpv6.h> #include <linux/in6.h> #include <linux/types.h> #include <linux/if_arp.h> #include <linux/netdevice.h> #include <linux/hash.h> #include <net/neighbour.h> /* Set to 3 to get tracing... */ #define ND_DEBUG 1 #define ND_PRINTK(val, level, fmt, ...) \ do { \ if (val <= ND_DEBUG) \ net_##level##_ratelimited(fmt, ##__VA_ARGS__); \ } while (0) struct ctl_table; struct inet6_dev; struct net_device; struct net_proto_family; struct sk_buff; struct prefix_info; extern struct neigh_table nd_tbl; struct nd_msg { struct icmp6hdr icmph; struct in6_addr target; __u8 opt[]; }; struct rs_msg { struct icmp6hdr icmph; __u8 opt[]; }; struct ra_msg { struct icmp6hdr icmph; __be32 reachable_time; __be32 retrans_timer; }; struct rd_msg { struct icmp6hdr icmph; struct in6_addr target; struct in6_addr dest; __u8 opt[]; }; struct nd_opt_hdr { __u8 nd_opt_type; __u8 nd_opt_len; } __packed; /* ND options */ struct ndisc_options { struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX]; #ifdef CONFIG_IPV6_ROUTE_INFO struct nd_opt_hdr *nd_opts_ri; struct nd_opt_hdr *nd_opts_ri_end; #endif struct nd_opt_hdr *nd_useropts; struct nd_opt_hdr *nd_useropts_end; #if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN) struct nd_opt_hdr *nd_802154_opt_array[ND_OPT_TARGET_LL_ADDR + 1]; #endif }; #define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR] #define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR] #define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO] #define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END] #define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR] #define nd_opts_mtu nd_opt_array[ND_OPT_MTU] #define nd_opts_nonce nd_opt_array[ND_OPT_NONCE] #define nd_802154_opts_src_lladdr nd_802154_opt_array[ND_OPT_SOURCE_LL_ADDR] #define nd_802154_opts_tgt_lladdr nd_802154_opt_array[ND_OPT_TARGET_LL_ADDR] #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7) struct ndisc_options *ndisc_parse_options(const struct net_device *dev, u8 *opt, int opt_len, struct ndisc_options *ndopts); void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data, int data_len, int pad); #define NDISC_OPS_REDIRECT_DATA_SPACE 2 /* * This structure defines the hooks for IPv6 neighbour discovery. * The following hooks can be defined; unless noted otherwise, they are * optional and can be filled with a null pointer. * * int (*is_useropt)(u8 nd_opt_type): * This function is called when IPv6 decide RA userspace options. if * this function returns 1 then the option given by nd_opt_type will * be handled as userspace option additional to the IPv6 options. * * int (*parse_options)(const struct net_device *dev, * struct nd_opt_hdr *nd_opt, * struct ndisc_options *ndopts): * This function is called while parsing ndisc ops and put each position * as pointer into ndopts. If this function return unequal 0, then this * function took care about the ndisc option, if 0 then the IPv6 ndisc * option parser will take care about that option. * * void (*update)(const struct net_device *dev, struct neighbour *n, * u32 flags, u8 icmp6_type, * const struct ndisc_options *ndopts): * This function is called when IPv6 ndisc updates the neighbour cache * entry. Additional options which can be updated may be previously * parsed by parse_opts callback and accessible over ndopts parameter. * * int (*opt_addr_space)(const struct net_device *dev, u8 icmp6_type, * struct neighbour *neigh, u8 *ha_buf, * u8 **ha): * This function is called when the necessary option space will be * calculated before allocating a skb. The parameters neigh, ha_buf * abd ha are available on NDISC_REDIRECT messages only. * * void (*fill_addr_option)(const struct net_device *dev, * struct sk_buff *skb, u8 icmp6_type, * const u8 *ha): * This function is called when the skb will finally fill the option * fields inside skb. NOTE: this callback should fill the option * fields to the skb which are previously indicated by opt_space * parameter. That means the decision to add such option should * not lost between these two callbacks, e.g. protected by interface * up state. * * void (*prefix_rcv_add_addr)(struct net *net, struct net_device *dev, * const struct prefix_info *pinfo, * struct inet6_dev *in6_dev, * struct in6_addr *addr, * int addr_type, u32 addr_flags, * bool sllao, bool tokenized, * __u32 valid_lft, u32 prefered_lft, * bool dev_addr_generated): * This function is called when a RA messages is received with valid * PIO option fields and an IPv6 address will be added to the interface * for autoconfiguration. The parameter dev_addr_generated reports about * if the address was based on dev->dev_addr or not. This can be used * to add a second address if link-layer operates with two link layer * addresses. E.g. 802.15.4 6LoWPAN. */ struct ndisc_ops { int (*is_useropt)(u8 nd_opt_type); int (*parse_options)(const struct net_device *dev, struct nd_opt_hdr *nd_opt, struct ndisc_options *ndopts); void (*update)(const struct net_device *dev, struct neighbour *n, u32 flags, u8 icmp6_type, const struct ndisc_options *ndopts); int (*opt_addr_space)(const struct net_device *dev, u8 icmp6_type, struct neighbour *neigh, u8 *ha_buf, u8 **ha); void (*fill_addr_option)(const struct net_device *dev, struct sk_buff *skb, u8 icmp6_type, const u8 *ha); void (*prefix_rcv_add_addr)(struct net *net, struct net_device *dev, const struct prefix_info *pinfo, struct inet6_dev *in6_dev, struct in6_addr *addr, int addr_type, u32 addr_flags, bool sllao, bool tokenized, __u32 valid_lft, u32 prefered_lft, bool dev_addr_generated); }; #if IS_ENABLED(CONFIG_IPV6) static inline int ndisc_ops_is_useropt(const struct net_device *dev, u8 nd_opt_type) { if (dev->ndisc_ops && dev->ndisc_ops->is_useropt) return dev->ndisc_ops->is_useropt(nd_opt_type); else return 0; } static inline int ndisc_ops_parse_options(const struct net_device *dev, struct nd_opt_hdr *nd_opt, struct ndisc_options *ndopts) { if (dev->ndisc_ops && dev->ndisc_ops->parse_options) return dev->ndisc_ops->parse_options(dev, nd_opt, ndopts); else return 0; } static inline void ndisc_ops_update(const struct net_device *dev, struct neighbour *n, u32 flags, u8 icmp6_type, const struct ndisc_options *ndopts) { if (dev->ndisc_ops && dev->ndisc_ops->update) dev->ndisc_ops->update(dev, n, flags, icmp6_type, ndopts); } static inline int ndisc_ops_opt_addr_space(const struct net_device *dev, u8 icmp6_type) { if (dev->ndisc_ops && dev->ndisc_ops->opt_addr_space && icmp6_type != NDISC_REDIRECT) return dev->ndisc_ops->opt_addr_space(dev, icmp6_type, NULL, NULL, NULL); else return 0; } static inline int ndisc_ops_redirect_opt_addr_space(const struct net_device *dev, struct neighbour *neigh, u8 *ha_buf, u8 **ha) { if (dev->ndisc_ops && dev->ndisc_ops->opt_addr_space) return dev->ndisc_ops->opt_addr_space(dev, NDISC_REDIRECT, neigh, ha_buf, ha); else return 0; } static inline void ndisc_ops_fill_addr_option(const struct net_device *dev, struct sk_buff *skb, u8 icmp6_type) { if (dev->ndisc_ops && dev->ndisc_ops->fill_addr_option && icmp6_type != NDISC_REDIRECT) dev->ndisc_ops->fill_addr_option(dev, skb, icmp6_type, NULL); } static inline void ndisc_ops_fill_redirect_addr_option(const struct net_device *dev, struct sk_buff *skb, const u8 *ha) { if (dev->ndisc_ops && dev->ndisc_ops->fill_addr_option) dev->ndisc_ops->fill_addr_option(dev, skb, NDISC_REDIRECT, ha); } static inline void ndisc_ops_prefix_rcv_add_addr(struct net *net, struct net_device *dev, const struct prefix_info *pinfo, struct inet6_dev *in6_dev, struct in6_addr *addr, int addr_type, u32 addr_flags, bool sllao, bool tokenized, __u32 valid_lft, u32 prefered_lft, bool dev_addr_generated) { if (dev->ndisc_ops && dev->ndisc_ops->prefix_rcv_add_addr) dev->ndisc_ops->prefix_rcv_add_addr(net, dev, pinfo, in6_dev, addr, addr_type, addr_flags, sllao, tokenized, valid_lft, prefered_lft, dev_addr_generated); } #endif /* * Return the padding between the option length and the start of the * link addr. Currently only IP-over-InfiniBand needs this, although * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may * also need a pad of 2. */ static inline int ndisc_addr_option_pad(unsigned short type) { switch (type) { case ARPHRD_INFINIBAND: return 2; default: return 0; } } static inline int __ndisc_opt_addr_space(unsigned char addr_len, int pad) { return NDISC_OPT_SPACE(addr_len + pad); } #if IS_ENABLED(CONFIG_IPV6) static inline int ndisc_opt_addr_space(struct net_device *dev, u8 icmp6_type) { return __ndisc_opt_addr_space(dev->addr_len, ndisc_addr_option_pad(dev->type)) + ndisc_ops_opt_addr_space(dev, icmp6_type); } static inline int ndisc_redirect_opt_addr_space(struct net_device *dev, struct neighbour *neigh, u8 *ops_data_buf, u8 **ops_data) { return __ndisc_opt_addr_space(dev->addr_len, ndisc_addr_option_pad(dev->type)) + ndisc_ops_redirect_opt_addr_space(dev, neigh, ops_data_buf, ops_data); } #endif static inline u8 *__ndisc_opt_addr_data(struct nd_opt_hdr *p, unsigned char addr_len, int prepad) { u8 *lladdr = (u8 *)(p + 1); int lladdrlen = p->nd_opt_len << 3; if (lladdrlen != __ndisc_opt_addr_space(addr_len, prepad)) return NULL; return lladdr + prepad; } static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p, struct net_device *dev) { return __ndisc_opt_addr_data(p, dev->addr_len, ndisc_addr_option_pad(dev->type)); } static inline u32 ndisc_hashfn(const void *pkey, const struct net_device *dev, __u32 *hash_rnd) { const u32 *p32 = pkey; return (((p32[0] ^ hash32_ptr(dev)) * hash_rnd[0]) + (p32[1] * hash_rnd[1]) + (p32[2] * hash_rnd[2]) + (p32[3] * hash_rnd[3])); } static inline struct neighbour *__ipv6_neigh_lookup_noref(struct net_device *dev, const void *pkey) { return ___neigh_lookup_noref(&nd_tbl, neigh_key_eq128, ndisc_hashfn, pkey, dev); } static inline struct neighbour *__ipv6_neigh_lookup_noref_stub(struct net_device *dev, const void *pkey) { return ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128, ndisc_hashfn, pkey, dev); } static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, const void *pkey) { struct neighbour *n; rcu_read_lock_bh(); n = __ipv6_neigh_lookup_noref(dev, pkey); if (n && !refcount_inc_not_zero(&n->refcnt)) n = NULL; rcu_read_unlock_bh(); return n; } static inline void __ipv6_confirm_neigh(struct net_device *dev, const void *pkey) { struct neighbour *n; rcu_read_lock_bh(); n = __ipv6_neigh_lookup_noref(dev, pkey); if (n) { unsigned long now = jiffies; /* avoid dirtying neighbour */ if (READ_ONCE(n->confirmed) != now) WRITE_ONCE(n->confirmed, now); } rcu_read_unlock_bh(); } static inline void __ipv6_confirm_neigh_stub(struct net_device *dev, const void *pkey) { struct neighbour *n; rcu_read_lock_bh(); n = __ipv6_neigh_lookup_noref_stub(dev, pkey); if (n) { unsigned long now = jiffies; /* avoid dirtying neighbour */ if (READ_ONCE(n->confirmed) != now) WRITE_ONCE(n->confirmed, now); } rcu_read_unlock_bh(); } /* uses ipv6_stub and is meant for use outside of IPv6 core */ static inline struct neighbour *ip_neigh_gw6(struct net_device *dev, const void *addr) { struct neighbour *neigh; neigh = __ipv6_neigh_lookup_noref_stub(dev, addr); if (unlikely(!neigh)) neigh = __neigh_create(ipv6_stub->nd_tbl, addr, dev, false); return neigh; } int ndisc_init(void); int ndisc_late_init(void); void ndisc_late_cleanup(void); void ndisc_cleanup(void); int ndisc_rcv(struct sk_buff *skb); void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, const struct in6_addr *daddr, const struct in6_addr *saddr, u64 nonce); void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr); void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, bool router, bool solicited, bool override, bool inc_opt); void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target); int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir); void ndisc_update(const struct net_device *dev, struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type, struct ndisc_options *ndopts); /* * IGMP */ int igmp6_init(void); int igmp6_late_init(void); void igmp6_cleanup(void); void igmp6_late_cleanup(void); int igmp6_event_query(struct sk_buff *skb); int igmp6_event_report(struct sk_buff *skb); #ifdef CONFIG_SYSCTL int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); int ndisc_ifinfo_sysctl_strategy(struct ctl_table *ctl, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); #endif void inet6_ifinfo_notify(int event, struct inet6_dev *idev); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _KERNEL_PRINTK_RINGBUFFER_H #define _KERNEL_PRINTK_RINGBUFFER_H #include <linux/atomic.h> #include <linux/dev_printk.h> /* * Meta information about each stored message. * * All fields are set by the printk code except for @seq, which is * set by the ringbuffer code. */ struct printk_info { u64 seq; /* sequence number */ u64 ts_nsec; /* timestamp in nanoseconds */ u16 text_len; /* length of text message */ u8 facility; /* syslog facility */ u8 flags:5; /* internal record flags */ u8 level:3; /* syslog level */ u32 caller_id; /* thread id or processor id */ struct dev_printk_info dev_info; }; /* * A structure providing the buffers, used by writers and readers. * * Writers: * Using prb_rec_init_wr(), a writer sets @text_buf_size before calling * prb_reserve(). On success, prb_reserve() sets @info and @text_buf to * buffers reserved for that writer. * * Readers: * Using prb_rec_init_rd(), a reader sets all fields before calling * prb_read_valid(). Note that the reader provides the @info and @text_buf, * buffers. On success, the struct pointed to by @info will be filled and * the char array pointed to by @text_buf will be filled with text data. */ struct printk_record { struct printk_info *info; char *text_buf; unsigned int text_buf_size; }; /* Specifies the logical position and span of a data block. */ struct prb_data_blk_lpos { unsigned long begin; unsigned long next; }; /* * A descriptor: the complete meta-data for a record. * * @state_var: A bitwise combination of descriptor ID and descriptor state. */ struct prb_desc { atomic_long_t state_var; struct prb_data_blk_lpos text_blk_lpos; }; /* A ringbuffer of "ID + data" elements. */ struct prb_data_ring { unsigned int size_bits; char *data; atomic_long_t head_lpos; atomic_long_t tail_lpos; }; /* A ringbuffer of "struct prb_desc" elements. */ struct prb_desc_ring { unsigned int count_bits; struct prb_desc *descs; struct printk_info *infos; atomic_long_t head_id; atomic_long_t tail_id; }; /* * The high level structure representing the printk ringbuffer. * * @fail: Count of failed prb_reserve() calls where not even a data-less * record was created. */ struct printk_ringbuffer { struct prb_desc_ring desc_ring; struct prb_data_ring text_data_ring; atomic_long_t fail; }; /* * Used by writers as a reserve/commit handle. * * @rb: Ringbuffer where the entry is reserved. * @irqflags: Saved irq flags to restore on entry commit. * @id: ID of the reserved descriptor. * @text_space: Total occupied buffer space in the text data ring, including * ID, alignment padding, and wrapping data blocks. * * This structure is an opaque handle for writers. Its contents are only * to be used by the ringbuffer implementation. */ struct prb_reserved_entry { struct printk_ringbuffer *rb; unsigned long irqflags; unsigned long id; unsigned int text_space; }; /* The possible responses of a descriptor state-query. */ enum desc_state { desc_miss = -1, /* ID mismatch (pseudo state) */ desc_reserved = 0x0, /* reserved, in use by writer */ desc_committed = 0x1, /* committed by writer, could get reopened */ desc_finalized = 0x2, /* committed, no further modification allowed */ desc_reusable = 0x3, /* free, not yet used by any writer */ }; #define _DATA_SIZE(sz_bits) (1UL << (sz_bits)) #define _DESCS_COUNT(ct_bits) (1U << (ct_bits)) #define DESC_SV_BITS (sizeof(unsigned long) * 8) #define DESC_FLAGS_SHIFT (DESC_SV_BITS - 2) #define DESC_FLAGS_MASK (3UL << DESC_FLAGS_SHIFT) #define DESC_STATE(sv) (3UL & (sv >> DESC_FLAGS_SHIFT)) #define DESC_SV(id, state) (((unsigned long)state << DESC_FLAGS_SHIFT) | id) #define DESC_ID_MASK (~DESC_FLAGS_MASK) #define DESC_ID(sv) ((sv) & DESC_ID_MASK) #define FAILED_LPOS 0x1 #define NO_LPOS 0x3 #define FAILED_BLK_LPOS \ { \ .begin = FAILED_LPOS, \ .next = FAILED_LPOS, \ } /* * Descriptor Bootstrap * * The descriptor array is minimally initialized to allow immediate usage * by readers and writers. The requirements that the descriptor array * initialization must satisfy: * * Req1 * The tail must point to an existing (committed or reusable) descriptor. * This is required by the implementation of prb_first_seq(). * * Req2 * Readers must see that the ringbuffer is initially empty. * * Req3 * The first record reserved by a writer is assigned sequence number 0. * * To satisfy Req1, the tail initially points to a descriptor that is * minimally initialized (having no data block, i.e. data-less with the * data block's lpos @begin and @next values set to FAILED_LPOS). * * To satisfy Req2, the initial tail descriptor is initialized to the * reusable state. Readers recognize reusable descriptors as existing * records, but skip over them. * * To satisfy Req3, the last descriptor in the array is used as the initial * head (and tail) descriptor. This allows the first record reserved by a * writer (head + 1) to be the first descriptor in the array. (Only the first * descriptor in the array could have a valid sequence number of 0.) * * The first time a descriptor is reserved, it is assigned a sequence number * with the value of the array index. A "first time reserved" descriptor can * be recognized because it has a sequence number of 0 but does not have an * index of 0. (Only the first descriptor in the array could have a valid * sequence number of 0.) After the first reservation, all future reservations * (recycling) simply involve incrementing the sequence number by the array * count. * * Hack #1 * Only the first descriptor in the array is allowed to have the sequence * number 0. In this case it is not possible to recognize if it is being * reserved the first time (set to index value) or has been reserved * previously (increment by the array count). This is handled by _always_ * incrementing the sequence number by the array count when reserving the * first descriptor in the array. In order to satisfy Req3, the sequence * number of the first descriptor in the array is initialized to minus * the array count. Then, upon the first reservation, it is incremented * to 0, thus satisfying Req3. * * Hack #2 * prb_first_seq() can be called at any time by readers to retrieve the * sequence number of the tail descriptor. However, due to Req2 and Req3, * initially there are no records to report the sequence number of * (sequence numbers are u64 and there is nothing less than 0). To handle * this, the sequence number of the initial tail descriptor is initialized * to 0. Technically this is incorrect, because there is no record with * sequence number 0 (yet) and the tail descriptor is not the first * descriptor in the array. But it allows prb_read_valid() to correctly * report the existence of a record for _any_ given sequence number at all * times. Bootstrapping is complete when the tail is pushed the first * time, thus finally pointing to the first descriptor reserved by a * writer, which has the assigned sequence number 0. */ /* * Initiating Logical Value Overflows * * Both logical position (lpos) and ID values can be mapped to array indexes * but may experience overflows during the lifetime of the system. To ensure * that printk_ringbuffer can handle the overflows for these types, initial * values are chosen that map to the correct initial array indexes, but will * result in overflows soon. * * BLK0_LPOS * The initial @head_lpos and @tail_lpos for data rings. It is at index * 0 and the lpos value is such that it will overflow on the first wrap. * * DESC0_ID * The initial @head_id and @tail_id for the desc ring. It is at the last * index of the descriptor array (see Req3 above) and the ID value is such * that it will overflow on the second wrap. */ #define BLK0_LPOS(sz_bits) (-(_DATA_SIZE(sz_bits))) #define DESC0_ID(ct_bits) DESC_ID(-(_DESCS_COUNT(ct_bits) + 1)) #define DESC0_SV(ct_bits) DESC_SV(DESC0_ID(ct_bits), desc_reusable) /* * Define a ringbuffer with an external text data buffer. The same as * DEFINE_PRINTKRB() but requires specifying an external buffer for the * text data. * * Note: The specified external buffer must be of the size: * 2 ^ (descbits + avgtextbits) */ #define _DEFINE_PRINTKRB(name, descbits, avgtextbits, text_buf) \ static struct prb_desc _##name##_descs[_DESCS_COUNT(descbits)] = { \ /* the initial head and tail */ \ [_DESCS_COUNT(descbits) - 1] = { \ /* reusable */ \ .state_var = ATOMIC_INIT(DESC0_SV(descbits)), \ /* no associated data block */ \ .text_blk_lpos = FAILED_BLK_LPOS, \ }, \ }; \ static struct printk_info _##name##_infos[_DESCS_COUNT(descbits)] = { \ /* this will be the first record reserved by a writer */ \ [0] = { \ /* will be incremented to 0 on the first reservation */ \ .seq = -(u64)_DESCS_COUNT(descbits), \ }, \ /* the initial head and tail */ \ [_DESCS_COUNT(descbits) - 1] = { \ /* reports the first seq value during the bootstrap phase */ \ .seq = 0, \ }, \ }; \ static struct printk_ringbuffer name = { \ .desc_ring = { \ .count_bits = descbits, \ .descs = &_##name##_descs[0], \ .infos = &_##name##_infos[0], \ .head_id = ATOMIC_INIT(DESC0_ID(descbits)), \ .tail_id = ATOMIC_INIT(DESC0_ID(descbits)), \ }, \ .text_data_ring = { \ .size_bits = (avgtextbits) + (descbits), \ .data = text_buf, \ .head_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \ .tail_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \ }, \ .fail = ATOMIC_LONG_INIT(0), \ } /** * DEFINE_PRINTKRB() - Define a ringbuffer. * * @name: The name of the ringbuffer variable. * @descbits: The number of descriptors as a power-of-2 value. * @avgtextbits: The average text data size per record as a power-of-2 value. * * This is a macro for defining a ringbuffer and all internal structures * such that it is ready for immediate use. See _DEFINE_PRINTKRB() for a * variant where the text data buffer can be specified externally. */ #define DEFINE_PRINTKRB(name, descbits, avgtextbits) \ static char _##name##_text[1U << ((avgtextbits) + (descbits))] \ __aligned(__alignof__(unsigned long)); \ _DEFINE_PRINTKRB(name, descbits, avgtextbits, &_##name##_text[0]) /* Writer Interface */ /** * prb_rec_init_wd() - Initialize a buffer for writing records. * * @r: The record to initialize. * @text_buf_size: The needed text buffer size. */ static inline void prb_rec_init_wr(struct printk_record *r, unsigned int text_buf_size) { r->info = NULL; r->text_buf = NULL; r->text_buf_size = text_buf_size; } bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, struct printk_record *r); bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, struct printk_record *r, u32 caller_id, unsigned int max_size); void prb_commit(struct prb_reserved_entry *e); void prb_final_commit(struct prb_reserved_entry *e); void prb_init(struct printk_ringbuffer *rb, char *text_buf, unsigned int text_buf_size, struct prb_desc *descs, unsigned int descs_count_bits, struct printk_info *infos); unsigned int prb_record_text_space(struct prb_reserved_entry *e); /* Reader Interface */ /** * prb_rec_init_rd() - Initialize a buffer for reading records. * * @r: The record to initialize. * @info: A buffer to store record meta-data. * @text_buf: A buffer to store text data. * @text_buf_size: The size of @text_buf. * * Initialize all the fields that a reader is interested in. All arguments * (except @r) are optional. Only record data for arguments that are * non-NULL or non-zero will be read. */ static inline void prb_rec_init_rd(struct printk_record *r, struct printk_info *info, char *text_buf, unsigned int text_buf_size) { r->info = info; r->text_buf = text_buf; r->text_buf_size = text_buf_size; } /** * prb_for_each_record() - Iterate over the records of a ringbuffer. * * @from: The sequence number to begin with. * @rb: The ringbuffer to iterate over. * @s: A u64 to store the sequence number on each iteration. * @r: A printk_record to store the record on each iteration. * * This is a macro for conveniently iterating over a ringbuffer. * Note that @s may not be the sequence number of the record on each * iteration. For the sequence number, @r->info->seq should be checked. * * Context: Any context. */ #define prb_for_each_record(from, rb, s, r) \ for ((s) = from; prb_read_valid(rb, s, r); (s) = (r)->info->seq + 1) /** * prb_for_each_info() - Iterate over the meta data of a ringbuffer. * * @from: The sequence number to begin with. * @rb: The ringbuffer to iterate over. * @s: A u64 to store the sequence number on each iteration. * @i: A printk_info to store the record meta data on each iteration. * @lc: An unsigned int to store the text line count of each record. * * This is a macro for conveniently iterating over a ringbuffer. * Note that @s may not be the sequence number of the record on each * iteration. For the sequence number, @r->info->seq should be checked. * * Context: Any context. */ #define prb_for_each_info(from, rb, s, i, lc) \ for ((s) = from; prb_read_valid_info(rb, s, i, lc); (s) = (i)->seq + 1) bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, struct printk_record *r); bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, struct printk_info *info, unsigned int *line_count); u64 prb_first_valid_seq(struct printk_ringbuffer *rb); u64 prb_next_seq(struct printk_ringbuffer *rb); #endif /* _KERNEL_PRINTK_RINGBUFFER_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Cryptographic API. * * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> * Copyright (c) 2005 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_INTERNAL_H #define _CRYPTO_INTERNAL_H #include <crypto/algapi.h> #include <linux/completion.h> #include <linux/list.h> #include <linux/module.h> #include <linux/notifier.h> #include <linux/numa.h> #include <linux/refcount.h> #include <linux/rwsem.h> #include <linux/sched.h> #include <linux/types.h> struct crypto_instance; struct crypto_template; struct crypto_larval { struct crypto_alg alg; struct crypto_alg *adult; struct completion completion; u32 mask; }; extern struct list_head crypto_alg_list; extern struct rw_semaphore crypto_alg_sem; extern struct blocking_notifier_head crypto_chain; #ifdef CONFIG_PROC_FS void __init crypto_init_proc(void); void __exit crypto_exit_proc(void); #else static inline void crypto_init_proc(void) { } static inline void crypto_exit_proc(void) { } #endif static inline unsigned int crypto_cipher_ctxsize(struct crypto_alg *alg) { return alg->cra_ctxsize; } static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg) { return alg->cra_ctxsize; } struct crypto_alg *crypto_mod_get(struct crypto_alg *alg); struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask); struct crypto_larval *crypto_larval_alloc(const char *name, u32 type, u32 mask); void crypto_larval_kill(struct crypto_alg *alg); void crypto_alg_tested(const char *name, int err); void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list, struct crypto_alg *nalg); void crypto_remove_final(struct list_head *list); void crypto_shoot_alg(struct crypto_alg *alg); struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type, u32 mask); void *crypto_create_tfm_node(struct crypto_alg *alg, const struct crypto_type *frontend, int node); static inline void *crypto_create_tfm(struct crypto_alg *alg, const struct crypto_type *frontend) { return crypto_create_tfm_node(alg, frontend, NUMA_NO_NODE); } struct crypto_alg *crypto_find_alg(const char *alg_name, const struct crypto_type *frontend, u32 type, u32 mask); void *crypto_alloc_tfm_node(const char *alg_name, const struct crypto_type *frontend, u32 type, u32 mask, int node); static inline void *crypto_alloc_tfm(const char *alg_name, const struct crypto_type *frontend, u32 type, u32 mask) { return crypto_alloc_tfm_node(alg_name, frontend, type, mask, NUMA_NO_NODE); } int crypto_probing_notify(unsigned long val, void *v); unsigned int crypto_alg_extsize(struct crypto_alg *alg); int crypto_type_has_alg(const char *name, const struct crypto_type *frontend, u32 type, u32 mask); static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg) { refcount_inc(&alg->cra_refcnt); return alg; } static inline void crypto_alg_put(struct crypto_alg *alg) { if (refcount_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy) alg->cra_destroy(alg); } static inline int crypto_tmpl_get(struct crypto_template *tmpl) { return try_module_get(tmpl->module); } static inline void crypto_tmpl_put(struct crypto_template *tmpl) { module_put(tmpl->module); } static inline int crypto_is_larval(struct crypto_alg *alg) { return alg->cra_flags & CRYPTO_ALG_LARVAL; } static inline int crypto_is_dead(struct crypto_alg *alg) { return alg->cra_flags & CRYPTO_ALG_DEAD; } static inline int crypto_is_moribund(struct crypto_alg *alg) { return alg->cra_flags & (CRYPTO_ALG_DEAD | CRYPTO_ALG_DYING); } static inline void crypto_notify(unsigned long val, void *v) { blocking_notifier_call_chain(&crypto_chain, val, v); } static inline void crypto_yield(u32 flags) { if (flags & CRYPTO_TFM_REQ_MAY_SLEEP) cond_resched(); } #endif /* _CRYPTO_INTERNAL_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BYTEORDER_GENERIC_H #define _LINUX_BYTEORDER_GENERIC_H /* * linux/byteorder/generic.h * Generic Byte-reordering support * * The "... p" macros, like le64_to_cpup, can be used with pointers * to unaligned data, but there will be a performance penalty on * some architectures. Use get_unaligned for unaligned data. * * Francois-Rene Rideau <fare@tunes.org> 19970707 * gathered all the good ideas from all asm-foo/byteorder.h into one file, * cleaned them up. * I hope it is compliant with non-GCC compilers. * I decided to put __BYTEORDER_HAS_U64__ in byteorder.h, * because I wasn't sure it would be ok to put it in types.h * Upgraded it to 2.1.43 * Francois-Rene Rideau <fare@tunes.org> 19971012 * Upgraded it to 2.1.57 * to please Linus T., replaced huge #ifdef's between little/big endian * by nestedly #include'd files. * Francois-Rene Rideau <fare@tunes.org> 19971205 * Made it to 2.1.71; now a facelift: * Put files under include/linux/byteorder/ * Split swab from generic support. * * TODO: * = Regular kernel maintainers could also replace all these manual * byteswap macros that remain, disseminated among drivers, * after some grep or the sources... * = Linus might want to rename all these macros and files to fit his taste, * to fit his personal naming scheme. * = it seems that a few drivers would also appreciate * nybble swapping support... * = every architecture could add their byteswap macro in asm/byteorder.h * see how some architectures already do (i386, alpha, ppc, etc) * = cpu_to_beXX and beXX_to_cpu might some day need to be well * distinguished throughout the kernel. This is not the case currently, * since little endian, big endian, and pdp endian machines needn't it. * But this might be the case for, say, a port of Linux to 20/21 bit * architectures (and F21 Linux addict around?). */ /* * The following macros are to be defined by <asm/byteorder.h>: * * Conversion of long and short int between network and host format * ntohl(__u32 x) * ntohs(__u16 x) * htonl(__u32 x) * htons(__u16 x) * It seems that some programs (which? where? or perhaps a standard? POSIX?) * might like the above to be functions, not macros (why?). * if that's true, then detect them, and take measures. * Anyway, the measure is: define only ___ntohl as a macro instead, * and in a separate file, have * unsigned long inline ntohl(x){return ___ntohl(x);} * * The same for constant arguments * __constant_ntohl(__u32 x) * __constant_ntohs(__u16 x) * __constant_htonl(__u32 x) * __constant_htons(__u16 x) * * Conversion of XX-bit integers (16- 32- or 64-) * between native CPU format and little/big endian format * 64-bit stuff only defined for proper architectures * cpu_to_[bl]eXX(__uXX x) * [bl]eXX_to_cpu(__uXX x) * * The same, but takes a pointer to the value to convert * cpu_to_[bl]eXXp(__uXX x) * [bl]eXX_to_cpup(__uXX x) * * The same, but change in situ * cpu_to_[bl]eXXs(__uXX x) * [bl]eXX_to_cpus(__uXX x) * * See asm-foo/byteorder.h for examples of how to provide * architecture-optimized versions * */ #define cpu_to_le64 __cpu_to_le64 #define le64_to_cpu __le64_to_cpu #define cpu_to_le32 __cpu_to_le32 #define le32_to_cpu __le32_to_cpu #define cpu_to_le16 __cpu_to_le16 #define le16_to_cpu __le16_to_cpu #define cpu_to_be64 __cpu_to_be64 #define be64_to_cpu __be64_to_cpu #define cpu_to_be32 __cpu_to_be32 #define be32_to_cpu __be32_to_cpu #define cpu_to_be16 __cpu_to_be16 #define be16_to_cpu __be16_to_cpu #define cpu_to_le64p __cpu_to_le64p #define le64_to_cpup __le64_to_cpup #define cpu_to_le32p __cpu_to_le32p #define le32_to_cpup __le32_to_cpup #define cpu_to_le16p __cpu_to_le16p #define le16_to_cpup __le16_to_cpup #define cpu_to_be64p __cpu_to_be64p #define be64_to_cpup __be64_to_cpup #define cpu_to_be32p __cpu_to_be32p #define be32_to_cpup __be32_to_cpup #define cpu_to_be16p __cpu_to_be16p #define be16_to_cpup __be16_to_cpup #define cpu_to_le64s __cpu_to_le64s #define le64_to_cpus __le64_to_cpus #define cpu_to_le32s __cpu_to_le32s #define le32_to_cpus __le32_to_cpus #define cpu_to_le16s __cpu_to_le16s #define le16_to_cpus __le16_to_cpus #define cpu_to_be64s __cpu_to_be64s #define be64_to_cpus __be64_to_cpus #define cpu_to_be32s __cpu_to_be32s #define be32_to_cpus __be32_to_cpus #define cpu_to_be16s __cpu_to_be16s #define be16_to_cpus __be16_to_cpus /* * They have to be macros in order to do the constant folding * correctly - if the argument passed into a inline function * it is no longer constant according to gcc.. */ #undef ntohl #undef ntohs #undef htonl #undef htons #define ___htonl(x) __cpu_to_be32(x) #define ___htons(x) __cpu_to_be16(x) #define ___ntohl(x) __be32_to_cpu(x) #define ___ntohs(x) __be16_to_cpu(x) #define htonl(x) ___htonl(x) #define ntohl(x) ___ntohl(x) #define htons(x) ___htons(x) #define ntohs(x) ___ntohs(x) static inline void le16_add_cpu(__le16 *var, u16 val) { *var = cpu_to_le16(le16_to_cpu(*var) + val); } static inline void le32_add_cpu(__le32 *var, u32 val) { *var = cpu_to_le32(le32_to_cpu(*var) + val); } static inline void le64_add_cpu(__le64 *var, u64 val) { *var = cpu_to_le64(le64_to_cpu(*var) + val); } /* XXX: this stuff can be optimized */ static inline void le32_to_cpu_array(u32 *buf, unsigned int words) { while (words--) { __le32_to_cpus(buf); buf++; } } static inline void cpu_to_le32_array(u32 *buf, unsigned int words) { while (words--) { __cpu_to_le32s(buf); buf++; } } static inline void be16_add_cpu(__be16 *var, u16 val) { *var = cpu_to_be16(be16_to_cpu(*var) + val); } static inline void be32_add_cpu(__be32 *var, u32 val) { *var = cpu_to_be32(be32_to_cpu(*var) + val); } static inline void be64_add_cpu(__be64 *var, u64 val) { *var = cpu_to_be64(be64_to_cpu(*var) + val); } static inline void cpu_to_be32_array(__be32 *dst, const u32 *src, size_t len) { int i; for (i = 0; i < len; i++) dst[i] = cpu_to_be32(src[i]); } static inline void be32_to_cpu_array(u32 *dst, const __be32 *src, size_t len) { int i; for (i = 0; i < len; i++) dst[i] = be32_to_cpu(src[i]); } #endif /* _LINUX_BYTEORDER_GENERIC_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 /* SPDX-License-Identifier: GPL-2.0 */ /* linux/include/linux/clockchips.h * * This file contains the structure definitions for clockchips. * * If you are not a clockchip, or the time of day code, you should * not be including this file! */ #ifndef _LINUX_CLOCKCHIPS_H #define _LINUX_CLOCKCHIPS_H #ifdef CONFIG_GENERIC_CLOCKEVENTS # include <linux/clocksource.h> # include <linux/cpumask.h> # include <linux/ktime.h> # include <linux/notifier.h> struct clock_event_device; struct module; /* * Possible states of a clock event device. * * DETACHED: Device is not used by clockevents core. Initial state or can be * reached from SHUTDOWN. * SHUTDOWN: Device is powered-off. Can be reached from PERIODIC or ONESHOT. * PERIODIC: Device is programmed to generate events periodically. Can be * reached from DETACHED or SHUTDOWN. * ONESHOT: Device is programmed to generate event only once. Can be reached * from DETACHED or SHUTDOWN. * ONESHOT_STOPPED: Device was programmed in ONESHOT mode and is temporarily * stopped. */ enum clock_event_state { CLOCK_EVT_STATE_DETACHED, CLOCK_EVT_STATE_SHUTDOWN, CLOCK_EVT_STATE_PERIODIC, CLOCK_EVT_STATE_ONESHOT, CLOCK_EVT_STATE_ONESHOT_STOPPED, }; /* * Clock event features */ # define CLOCK_EVT_FEAT_PERIODIC 0x000001 # define CLOCK_EVT_FEAT_ONESHOT 0x000002 # define CLOCK_EVT_FEAT_KTIME 0x000004 /* * x86(64) specific (mis)features: * * - Clockevent source stops in C3 State and needs broadcast support. * - Local APIC timer is used as a dummy device. */ # define CLOCK_EVT_FEAT_C3STOP 0x000008 # define CLOCK_EVT_FEAT_DUMMY 0x000010 /* * Core shall set the interrupt affinity dynamically in broadcast mode */ # define CLOCK_EVT_FEAT_DYNIRQ 0x000020 # define CLOCK_EVT_FEAT_PERCPU 0x000040 /* * Clockevent device is based on a hrtimer for broadcast */ # define CLOCK_EVT_FEAT_HRTIMER 0x000080 /** * struct clock_event_device - clock event device descriptor * @event_handler: Assigned by the framework to be called by the low * level handler of the event source * @set_next_event: set next event function using a clocksource delta * @set_next_ktime: set next event function using a direct ktime value * @next_event: local storage for the next event in oneshot mode * @max_delta_ns: maximum delta value in ns * @min_delta_ns: minimum delta value in ns * @mult: nanosecond to cycles multiplier * @shift: nanoseconds to cycles divisor (power of two) * @state_use_accessors:current state of the device, assigned by the core code * @features: features * @retries: number of forced programming retries * @set_state_periodic: switch state to periodic * @set_state_oneshot: switch state to oneshot * @set_state_oneshot_stopped: switch state to oneshot_stopped * @set_state_shutdown: switch state to shutdown * @tick_resume: resume clkevt device * @broadcast: function to broadcast events * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration * @name: ptr to clock event name * @rating: variable to rate clock event devices * @irq: IRQ number (only for non CPU local devices) * @bound_on: Bound on CPU * @cpumask: cpumask to indicate for which CPUs this device works * @list: list head for the management code * @owner: module reference */ struct clock_event_device { void (*event_handler)(struct clock_event_device *); int (*set_next_event)(unsigned long evt, struct clock_event_device *); int (*set_next_ktime)(ktime_t expires, struct clock_event_device *); ktime_t next_event; u64 max_delta_ns; u64 min_delta_ns; u32 mult; u32 shift; enum clock_event_state state_use_accessors; unsigned int features; unsigned long retries; int (*set_state_periodic)(struct clock_event_device *); int (*set_state_oneshot)(struct clock_event_device *); int (*set_state_oneshot_stopped)(struct clock_event_device *); int (*set_state_shutdown)(struct clock_event_device *); int (*tick_resume)(struct clock_event_device *); void (*broadcast)(const struct cpumask *mask); void (*suspend)(struct clock_event_device *); void (*resume)(struct clock_event_device *); unsigned long min_delta_ticks; unsigned long max_delta_ticks; const char *name; int rating; int irq; int bound_on; const struct cpumask *cpumask; struct list_head list; struct module *owner; } ____cacheline_aligned; /* Helpers to verify state of a clockevent device */ static inline bool clockevent_state_detached(struct clock_event_device *dev) { return dev->state_use_accessors == CLOCK_EVT_STATE_DETACHED; } static inline bool clockevent_state_shutdown(struct clock_event_device *dev) { return dev->state_use_accessors == CLOCK_EVT_STATE_SHUTDOWN; } static inline bool clockevent_state_periodic(struct clock_event_device *dev) { return dev->state_use_accessors == CLOCK_EVT_STATE_PERIODIC; } static inline bool clockevent_state_oneshot(struct clock_event_device *dev) { return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT; } static inline bool clockevent_state_oneshot_stopped(struct clock_event_device *dev) { return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT_STOPPED; } /* * Calculate a multiplication factor for scaled math, which is used to convert * nanoseconds based values to clock ticks: * * clock_ticks = (nanoseconds * factor) >> shift. * * div_sc is the rearranged equation to calculate a factor from a given clock * ticks / nanoseconds ratio: * * factor = (clock_ticks << shift) / nanoseconds */ static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec, int shift) { u64 tmp = ((u64)ticks) << shift; do_div(tmp, nsec); return (unsigned long) tmp; } /* Clock event layer functions */ extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt); extern void clockevents_register_device(struct clock_event_device *dev); extern int clockevents_unbind_device(struct clock_event_device *ced, int cpu); extern void clockevents_config_and_register(struct clock_event_device *dev, u32 freq, unsigned long min_delta, unsigned long max_delta); extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq); static inline void clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 maxsec) { return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, maxsec); } extern void clockevents_suspend(void); extern void clockevents_resume(void); # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST # ifdef CONFIG_ARCH_HAS_TICK_BROADCAST extern void tick_broadcast(const struct cpumask *mask); # else # define tick_broadcast NULL # endif extern int tick_receive_broadcast(void); # endif # if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) extern void tick_setup_hrtimer_broadcast(void); extern int tick_check_broadcast_expired(void); # else static inline int tick_check_broadcast_expired(void) { return 0; } static inline void tick_setup_hrtimer_broadcast(void) { } # endif #else /* !CONFIG_GENERIC_CLOCKEVENTS: */ static inline void clockevents_suspend(void) { } static inline void clockevents_resume(void) { } static inline int tick_check_broadcast_expired(void) { return 0; } static inline void tick_setup_hrtimer_broadcast(void) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ #endif /* _LINUX_CLOCKCHIPS_H */
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_UACCESS_H__ #define __LINUX_UACCESS_H__ #include <linux/fault-inject-usercopy.h> #include <linux/instrumented.h> #include <linux/minmax.h> #include <linux/sched.h> #include <linux/thread_info.h> #include <asm/uaccess.h> #ifdef CONFIG_SET_FS /* * Force the uaccess routines to be wired up for actual userspace access, * overriding any possible set_fs(KERNEL_DS) still lingering around. Undone * using force_uaccess_end below. */ static inline mm_segment_t force_uaccess_begin(void) { mm_segment_t fs = get_fs(); set_fs(USER_DS); return fs; } static inline void force_uaccess_end(mm_segment_t oldfs) { set_fs(oldfs); } #else /* CONFIG_SET_FS */ typedef struct { /* empty dummy */ } mm_segment_t; #ifndef TASK_SIZE_MAX #define TASK_SIZE_MAX TASK_SIZE #endif #define uaccess_kernel() (false) #define user_addr_max() (TASK_SIZE_MAX) static inline mm_segment_t force_uaccess_begin(void) { return (mm_segment_t) { }; } static inline void force_uaccess_end(mm_segment_t oldfs) { } #endif /* CONFIG_SET_FS */ /* * Architectures should provide two primitives (raw_copy_{to,from}_user()) * and get rid of their private instances of copy_{to,from}_user() and * __copy_{to,from}_user{,_inatomic}(). * * raw_copy_{to,from}_user(to, from, size) should copy up to size bytes and * return the amount left to copy. They should assume that access_ok() has * already been checked (and succeeded); they should *not* zero-pad anything. * No KASAN or object size checks either - those belong here. * * Both of these functions should attempt to copy size bytes starting at from * into the area starting at to. They must not fetch or store anything * outside of those areas. Return value must be between 0 (everything * copied successfully) and size (nothing copied). * * If raw_copy_{to,from}_user(to, from, size) returns N, size - N bytes starting * at to must become equal to the bytes fetched from the corresponding area * starting at from. All data past to + size - N must be left unmodified. * * If copying succeeds, the return value must be 0. If some data cannot be * fetched, it is permitted to copy less than had been fetched; the only * hard requirement is that not storing anything at all (i.e. returning size) * should happen only when nothing could be copied. In other words, you don't * have to squeeze as much as possible - it is allowed, but not necessary. * * For raw_copy_from_user() to always points to kernel memory and no faults * on store should happen. Interpretation of from is affected by set_fs(). * For raw_copy_to_user() it's the other way round. * * Both can be inlined - it's up to architectures whether it wants to bother * with that. They should not be used directly; they are used to implement * the 6 functions (copy_{to,from}_user(), __copy_{to,from}_user_inatomic()) * that are used instead. Out of those, __... ones are inlined. Plain * copy_{to,from}_user() might or might not be inlined. If you want them * inlined, have asm/uaccess.h define INLINE_COPY_{TO,FROM}_USER. * * NOTE: only copy_from_user() zero-pads the destination in case of short copy. * Neither __copy_from_user() nor __copy_from_user_inatomic() zero anything * at all; their callers absolutely must check the return value. * * Biarch ones should also provide raw_copy_in_user() - similar to the above, * but both source and destination are __user pointers (affected by set_fs() * as usual) and both source and destination can trigger faults. */ static __always_inline __must_check unsigned long __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) { instrument_copy_from_user(to, from, n); check_object_size(to, n, false); return raw_copy_from_user(to, from, n); } static __always_inline __must_check unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { might_fault(); if (should_fail_usercopy()) return n; instrument_copy_from_user(to, from, n); check_object_size(to, n, false); return raw_copy_from_user(to, from, n); } /** * __copy_to_user_inatomic: - Copy a block of data into user space, with less checking. * @to: Destination address, in user space. * @from: Source address, in kernel space. * @n: Number of bytes to copy. * * Context: User context only. * * Copy data from kernel space to user space. Caller must check * the specified block with access_ok() before calling this function. * The caller should also make sure he pins the user space address * so that we don't result in page fault and sleep. */ static __always_inline __must_check unsigned long __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) { if (should_fail_usercopy()) return n; instrument_copy_to_user(to, from, n); check_object_size(from, n, true); return raw_copy_to_user(to, from, n); } static __always_inline __must_check unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); if (should_fail_usercopy()) return n; instrument_copy_to_user(to, from, n); check_object_size(from, n, true); return raw_copy_to_user(to, from, n); } #ifdef INLINE_COPY_FROM_USER static inline __must_check unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); if (!should_fail_usercopy() && likely(access_ok(from, n))) { instrument_copy_from_user(to, from, n); res = raw_copy_from_user(to, from, n); } if (unlikely(res)) memset(to + (n - res), 0, res); return res; } #else extern __must_check unsigned long _copy_from_user(void *, const void __user *, unsigned long); #endif #ifdef INLINE_COPY_TO_USER static inline __must_check unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); if (should_fail_usercopy()) return n; if (access_ok(to, n)) { instrument_copy_to_user(to, from, n); n = raw_copy_to_user(to, from, n); } return n; } #else extern __must_check unsigned long _copy_to_user(void __user *, const void *, unsigned long); #endif static __always_inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { if (likely(check_copy_size(to, n, false))) n = _copy_from_user(to, from, n); return n; } static __always_inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { if (likely(check_copy_size(from, n, true))) n = _copy_to_user(to, from, n); return n; } #ifdef CONFIG_COMPAT static __always_inline unsigned long __must_check copy_in_user(void __user *to, const void __user *from, unsigned long n) { might_fault(); if (access_ok(to, n) && access_ok(from, n)) n = raw_copy_in_user(to, from, n); return n; } #endif #ifndef copy_mc_to_kernel /* * Without arch opt-in this generic copy_mc_to_kernel() will not handle * #MC (or arch equivalent) during source read. */ static inline unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, size_t cnt) { memcpy(dst, src, cnt); return 0; } #endif static __always_inline void pagefault_disabled_inc(void) { current->pagefault_disabled++; } static __always_inline void pagefault_disabled_dec(void) { current->pagefault_disabled--; } /* * These routines enable/disable the pagefault handler. If disabled, it will * not take any locks and go straight to the fixup table. * * User access methods will not sleep when called from a pagefault_disabled() * environment. */ static inline void pagefault_disable(void) { pagefault_disabled_inc(); /* * make sure to have issued the store before a pagefault * can hit. */ barrier(); } static inline void pagefault_enable(void) { /* * make sure to issue those last loads/stores before enabling * the pagefault handler again. */ barrier(); pagefault_disabled_dec(); } /* * Is the pagefault handler disabled? If so, user access methods will not sleep. */ static inline bool pagefault_disabled(void) { return current->pagefault_disabled != 0; } /* * The pagefault handler is in general disabled by pagefault_disable() or * when in irq context (via in_atomic()). * * This function should only be used by the fault handlers. Other users should * stick to pagefault_disabled(). * Please NEVER use preempt_disable() to disable the fault handler. With * !CONFIG_PREEMPT_COUNT, this is like a NOP. So the handler won't be disabled. * in_atomic() will report different values based on !CONFIG_PREEMPT_COUNT. */ #define faulthandler_disabled() (pagefault_disabled() || in_atomic()) #ifndef ARCH_HAS_NOCACHE_UACCESS static inline __must_check unsigned long __copy_from_user_inatomic_nocache(void *to, const void __user *from, unsigned long n) { return __copy_from_user_inatomic(to, from, n); } #endif /* ARCH_HAS_NOCACHE_UACCESS */ extern __must_check int check_zeroed_user(const void __user *from, size_t size); /** * copy_struct_from_user: copy a struct from userspace * @dst: Destination address, in kernel space. This buffer must be @ksize * bytes long. * @ksize: Size of @dst struct. * @src: Source address, in userspace. * @usize: (Alleged) size of @src struct. * * Copies a struct from userspace to kernel space, in a way that guarantees * backwards-compatibility for struct syscall arguments (as long as future * struct extensions are made such that all new fields are *appended* to the * old struct, and zeroed-out new fields have the same meaning as the old * struct). * * @ksize is just sizeof(*dst), and @usize should've been passed by userspace. * The recommended usage is something like the following: * * SYSCALL_DEFINE2(foobar, const struct foo __user *, uarg, size_t, usize) * { * int err; * struct foo karg = {}; * * if (usize > PAGE_SIZE) * return -E2BIG; * if (usize < FOO_SIZE_VER0) * return -EINVAL; * * err = copy_struct_from_user(&karg, sizeof(karg), uarg, usize); * if (err) * return err; * * // ... * } * * There are three cases to consider: * * If @usize == @ksize, then it's copied verbatim. * * If @usize < @ksize, then the userspace has passed an old struct to a * newer kernel. The rest of the trailing bytes in @dst (@ksize - @usize) * are to be zero-filled. * * If @usize > @ksize, then the userspace has passed a new struct to an * older kernel. The trailing bytes unknown to the kernel (@usize - @ksize) * are checked to ensure they are zeroed, otherwise -E2BIG is returned. * * Returns (in all cases, some data may have been copied): * * -E2BIG: (@usize > @ksize) and there are non-zero trailing bytes in @src. * * -EFAULT: access to userspace failed. */ static __always_inline __must_check int copy_struct_from_user(void *dst, size_t ksize, const void __user *src, size_t usize) { size_t size = min(ksize, usize); size_t rest = max(ksize, usize) - size; /* Deal with trailing bytes. */ if (usize < ksize) { memset(dst + size, 0, rest); } else if (usize > ksize) { int ret = check_zeroed_user(src + size, rest); if (ret <= 0) return ret ?: -E2BIG; } /* Copy the interoperable parts of the struct. */ if (copy_from_user(dst, src, size)) return -EFAULT; return 0; } bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size); long copy_from_kernel_nofault(void *dst, const void *src, size_t size); long notrace copy_to_kernel_nofault(void *dst, const void *src, size_t size); long copy_from_user_nofault(void *dst, const void __user *src, size_t size); long notrace copy_to_user_nofault(void __user *dst, const void *src, size_t size); long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count); long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, long count); long strnlen_user_nofault(const void __user *unsafe_addr, long count); /** * get_kernel_nofault(): safely attempt to read from a location * @val: read into this variable * @ptr: address to read from * * Returns 0 on success, or -EFAULT. */ #define get_kernel_nofault(val, ptr) ({ \ const typeof(val) *__gk_ptr = (ptr); \ copy_from_kernel_nofault(&(val), __gk_ptr, sizeof(val));\ }) #ifndef user_access_begin #define user_access_begin(ptr,len) access_ok(ptr, len) #define user_access_end() do { } while (0) #define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0) #define unsafe_get_user(x,p,e) unsafe_op_wrap(__get_user(x,p),e) #define unsafe_put_user(x,p,e) unsafe_op_wrap(__put_user(x,p),e) #define unsafe_copy_to_user(d,s,l,e) unsafe_op_wrap(__copy_to_user(d,s,l),e) static inline unsigned long user_access_save(void) { return 0UL; } static inline void user_access_restore(unsigned long flags) { } #endif #ifndef user_write_access_begin #define user_write_access_begin user_access_begin #define user_write_access_end user_access_end #endif #ifndef user_read_access_begin #define user_read_access_begin user_access_begin #define user_read_access_end user_access_end #endif #ifdef CONFIG_HARDENED_USERCOPY void usercopy_warn(const char *name, const char *detail, bool to_user, unsigned long offset, unsigned long len); void __noreturn usercopy_abort(const char *name, const char *detail, bool to_user, unsigned long offset, unsigned long len); #endif #endif /* __LINUX_UACCESS_H__ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef INT_BLK_MQ_H #define INT_BLK_MQ_H #include "blk-stat.h" #include "blk-mq-tag.h" struct blk_mq_tag_set; struct blk_mq_ctxs { struct kobject kobj; struct blk_mq_ctx __percpu *queue_ctx; }; /** * struct blk_mq_ctx - State for a software queue facing the submitting CPUs */ struct blk_mq_ctx { struct { spinlock_t lock; struct list_head rq_lists[HCTX_MAX_TYPES]; } ____cacheline_aligned_in_smp; unsigned int cpu; unsigned short index_hw[HCTX_MAX_TYPES]; struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; /* incremented at dispatch time */ unsigned long rq_dispatched[2]; unsigned long rq_merged; /* incremented at completion time */ unsigned long ____cacheline_aligned_in_smp rq_completed[2]; struct request_queue *queue; struct blk_mq_ctxs *ctxs; struct kobject kobj; } ____cacheline_aligned_in_smp; void blk_mq_exit_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *, unsigned int); void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, bool kick_requeue_list); void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *start); void blk_mq_put_rq_ref(struct request *rq); /* * Internal helpers for allocating/freeing the request map */ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, unsigned int hctx_idx); void blk_mq_free_rq_map(struct blk_mq_tags *tags, unsigned int flags); struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, unsigned int hctx_idx, unsigned int nr_tags, unsigned int reserved_tags, unsigned int flags); int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, unsigned int hctx_idx, unsigned int depth); /* * Internal helpers for request insertion into sw queues */ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head); void blk_mq_request_bypass_insert(struct request *rq, bool at_head, bool run_queue); void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list); /* Used by blk_insert_cloned_request() to issue request directly */ blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last); void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, struct list_head *list); /* * CPU -> queue mappings */ extern int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int); /* * blk_mq_map_queue_type() - map (hctx_type,cpu) to hardware queue * @q: request queue * @type: the hctx type index * @cpu: CPU */ static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, enum hctx_type type, unsigned int cpu) { return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; } /* * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue * @q: request queue * @flags: request command flags * @cpu: cpu ctx */ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, unsigned int flags, struct blk_mq_ctx *ctx) { enum hctx_type type = HCTX_TYPE_DEFAULT; /* * The caller ensure that if REQ_HIPRI, poll must be enabled. */ if (flags & REQ_HIPRI) type = HCTX_TYPE_POLL; else if ((flags & REQ_OP_MASK) == REQ_OP_READ) type = HCTX_TYPE_READ; return ctx->hctxs[type]; } /* * sysfs helpers */ extern void blk_mq_sysfs_init(struct request_queue *q); extern void blk_mq_sysfs_deinit(struct request_queue *q); extern int __blk_mq_register_dev(struct device *dev, struct request_queue *q); extern int blk_mq_sysfs_register(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); void blk_mq_release(struct request_queue *q); static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, unsigned int cpu) { return per_cpu_ptr(q->queue_ctx, cpu); } /* * This assumes per-cpu software queueing queues. They could be per-node * as well, for instance. For now this is hardcoded as-is. Note that we don't * care about preemption, since we know the ctx's are persistent. This does * mean that we can't rely on ctx always matching the currently running CPU. */ static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q) { return __blk_mq_get_ctx(q, raw_smp_processor_id()); } struct blk_mq_alloc_data { /* input parameter */ struct request_queue *q; blk_mq_req_flags_t flags; unsigned int shallow_depth; unsigned int cmd_flags; /* input & output parameter */ struct blk_mq_ctx *ctx; struct blk_mq_hw_ctx *hctx; }; static inline bool blk_mq_is_sbitmap_shared(unsigned int flags) { return flags & BLK_MQ_F_TAG_HCTX_SHARED; } static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data) { if (data->q->elevator) return data->hctx->sched_tags; return data->hctx->tags; } static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx) { return test_bit(BLK_MQ_S_STOPPED, &hctx->state); } static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx) { return hctx->nr_ctx && hctx->tags; } unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part); void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, unsigned int inflight[2]); static inline void blk_mq_put_dispatch_budget(struct request_queue *q) { if (q->mq_ops->put_budget) q->mq_ops->put_budget(q); } static inline bool blk_mq_get_dispatch_budget(struct request_queue *q) { if (q->mq_ops->get_budget) return q->mq_ops->get_budget(q); return true; } static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx) { if (blk_mq_is_sbitmap_shared(hctx->flags)) atomic_inc(&hctx->queue->nr_active_requests_shared_sbitmap); else atomic_inc(&hctx->nr_active); } static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx) { if (blk_mq_is_sbitmap_shared(hctx->flags)) atomic_dec(&hctx->queue->nr_active_requests_shared_sbitmap); else atomic_dec(&hctx->nr_active); } static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx) { if (blk_mq_is_sbitmap_shared(hctx->flags)) return atomic_read(&hctx->queue->nr_active_requests_shared_sbitmap); return atomic_read(&hctx->nr_active); } static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq) { blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag); rq->tag = BLK_MQ_NO_TAG; if (rq->rq_flags & RQF_MQ_INFLIGHT) { rq->rq_flags &= ~RQF_MQ_INFLIGHT; __blk_mq_dec_active_requests(hctx); } } static inline void blk_mq_put_driver_tag(struct request *rq) { if (rq->tag == BLK_MQ_NO_TAG || rq->internal_tag == BLK_MQ_NO_TAG) return; __blk_mq_put_driver_tag(rq->mq_hctx, rq); } static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap) { int cpu; for_each_possible_cpu(cpu) qmap->mq_map[cpu] = 0; } /* * blk_mq_plug() - Get caller context plug * @q: request queue * @bio : the bio being submitted by the caller context * * Plugging, by design, may delay the insertion of BIOs into the elevator in * order to increase BIO merging opportunities. This however can cause BIO * insertion order to change from the order in which submit_bio() is being * executed in the case of multiple contexts concurrently issuing BIOs to a * device, even if these context are synchronized to tightly control BIO issuing * order. While this is not a problem with regular block devices, this ordering * change can cause write BIO failures with zoned block devices as these * require sequential write patterns to zones. Prevent this from happening by * ignoring the plug state of a BIO issuing context if the target request queue * is for a zoned block device and the BIO to plug is a write operation. * * Return current->plug if the bio can be plugged and NULL otherwise */ static inline struct blk_plug *blk_mq_plug(struct request_queue *q, struct bio *bio) { /* * For regular block devices or read operations, use the context plug * which may be NULL if blk_start_plug() was not executed. */ if (!blk_queue_is_zoned(q) || !op_is_write(bio_op(bio))) return current->plug; /* Zoned block device write operation case: do not plug the BIO */ return NULL; } /* * For shared tag users, we track the number of currently active users * and attempt to provide a fair share of the tag depth for each of them. */ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt) { unsigned int depth, users; if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) return true; /* * Don't try dividing an ant */ if (bt->sb.depth == 1) return true; if (blk_mq_is_sbitmap_shared(hctx->flags)) { struct request_queue *q = hctx->queue; struct blk_mq_tag_set *set = q->tag_set; if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) return true; users = atomic_read(&set->active_queues_shared_sbitmap); } else { if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) return true; users = atomic_read(&hctx->tags->active_queues); } if (!users) return true; /* * Allow at least some tags */ depth = max((bt->sb.depth + users - 1) / users, 4U); return __blk_mq_active_requests(hctx) < depth; } #endif
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) 2001 Momchil Velikov * Portions Copyright (C) 2001 Christoph Hellwig * Copyright (C) 2006 Nick Piggin * Copyright (C) 2012 Konstantin Khlebnikov */ #ifndef _LINUX_RADIX_TREE_H #define _LINUX_RADIX_TREE_H #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/percpu.h> #include <linux/preempt.h> #include <linux/rcupdate.h> #include <linux/spinlock.h> #include <linux/types.h> #include <linux/xarray.h> #include <linux/local_lock.h> /* Keep unconverted code working */ #define radix_tree_root xarray #define radix_tree_node xa_node struct radix_tree_preload { local_lock_t lock; unsigned nr; /* nodes->parent points to next preallocated node */ struct radix_tree_node *nodes; }; DECLARE_PER_CPU(struct radix_tree_preload, radix_tree_preloads); /* * The bottom two bits of the slot determine how the remaining bits in the * slot are interpreted: * * 00 - data pointer * 10 - internal entry * x1 - value entry * * The internal entry may be a pointer to the next level in the tree, a * sibling entry, or an indicator that the entry in this slot has been moved * to another location in the tree and the lookup should be restarted. While * NULL fits the 'data pointer' pattern, it means that there is no entry in * the tree for this index (no matter what level of the tree it is found at). * This means that storing a NULL entry in the tree is the same as deleting * the entry from the tree. */ #define RADIX_TREE_ENTRY_MASK 3UL #define RADIX_TREE_INTERNAL_NODE 2UL static inline bool radix_tree_is_internal_node(void *ptr) { return ((unsigned long)ptr & RADIX_TREE_ENTRY_MASK) == RADIX_TREE_INTERNAL_NODE; } /*** radix-tree API starts here ***/ #define RADIX_TREE_MAP_SHIFT XA_CHUNK_SHIFT #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) #define RADIX_TREE_MAX_TAGS XA_MAX_MARKS #define RADIX_TREE_TAG_LONGS XA_MARK_LONGS #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ RADIX_TREE_MAP_SHIFT)) /* The IDR tag is stored in the low bits of xa_flags */ #define ROOT_IS_IDR ((__force gfp_t)4) /* The top bits of xa_flags are used to store the root tags */ #define ROOT_TAG_SHIFT (__GFP_BITS_SHIFT) #define RADIX_TREE_INIT(name, mask) XARRAY_INIT(name, mask) #define RADIX_TREE(name, mask) \ struct radix_tree_root name = RADIX_TREE_INIT(name, mask) #define INIT_RADIX_TREE(root, mask) xa_init_flags(root, mask) static inline bool radix_tree_empty(const struct radix_tree_root *root) { return root->xa_head == NULL; } /** * struct radix_tree_iter - radix tree iterator state * * @index: index of current slot * @next_index: one beyond the last index for this chunk * @tags: bit-mask for tag-iterating * @node: node that contains current slot * * This radix tree iterator works in terms of "chunks" of slots. A chunk is a * subinterval of slots contained within one radix tree leaf node. It is * described by a pointer to its first slot and a struct radix_tree_iter * which holds the chunk's position in the tree and its size. For tagged * iteration radix_tree_iter also holds the slots' bit-mask for one chosen * radix tree tag. */ struct radix_tree_iter { unsigned long index; unsigned long next_index; unsigned long tags; struct radix_tree_node *node; }; /** * Radix-tree synchronization * * The radix-tree API requires that users provide all synchronisation (with * specific exceptions, noted below). * * Synchronization of access to the data items being stored in the tree, and * management of their lifetimes must be completely managed by API users. * * For API usage, in general, * - any function _modifying_ the tree or tags (inserting or deleting * items, setting or clearing tags) must exclude other modifications, and * exclude any functions reading the tree. * - any function _reading_ the tree or tags (looking up items or tags, * gang lookups) must exclude modifications to the tree, but may occur * concurrently with other readers. * * The notable exceptions to this rule are the following functions: * __radix_tree_lookup * radix_tree_lookup * radix_tree_lookup_slot * radix_tree_tag_get * radix_tree_gang_lookup * radix_tree_gang_lookup_tag * radix_tree_gang_lookup_tag_slot * radix_tree_tagged * * The first 7 functions are able to be called locklessly, using RCU. The * caller must ensure calls to these functions are made within rcu_read_lock() * regions. Other readers (lock-free or otherwise) and modifications may be * running concurrently. * * It is still required that the caller manage the synchronization and lifetimes * of the items. So if RCU lock-free lookups are used, typically this would mean * that the items have their own locks, or are amenable to lock-free access; and * that the items are freed by RCU (or only freed after having been deleted from * the radix tree *and* a synchronize_rcu() grace period). * * (Note, rcu_assign_pointer and rcu_dereference are not needed to control * access to data items when inserting into or looking up from the radix tree) * * Note that the value returned by radix_tree_tag_get() may not be relied upon * if only the RCU read lock is held. Functions to set/clear tags and to * delete nodes running concurrently with it may affect its result such that * two consecutive reads in the same locked section may return different * values. If reliability is required, modification functions must also be * excluded from concurrency. * * radix_tree_tagged is able to be called without locking or RCU. */ /** * radix_tree_deref_slot - dereference a slot * @slot: slot pointer, returned by radix_tree_lookup_slot * * For use with radix_tree_lookup_slot(). Caller must hold tree at least read * locked across slot lookup and dereference. Not required if write lock is * held (ie. items cannot be concurrently inserted). * * radix_tree_deref_retry must be used to confirm validity of the pointer if * only the read lock is held. * * Return: entry stored in that slot. */ static inline void *radix_tree_deref_slot(void __rcu **slot) { return rcu_dereference(*slot); } /** * radix_tree_deref_slot_protected - dereference a slot with tree lock held * @slot: slot pointer, returned by radix_tree_lookup_slot * * Similar to radix_tree_deref_slot. The caller does not hold the RCU read * lock but it must hold the tree lock to prevent parallel updates. * * Return: entry stored in that slot. */ static inline void *radix_tree_deref_slot_protected(void __rcu **slot, spinlock_t *treelock) { return rcu_dereference_protected(*slot, lockdep_is_held(treelock)); } /** * radix_tree_deref_retry - check radix_tree_deref_slot * @arg: pointer returned by radix_tree_deref_slot * Returns: 0 if retry is not required, otherwise retry is required * * radix_tree_deref_retry must be used with radix_tree_deref_slot. */ static inline int radix_tree_deref_retry(void *arg) { return unlikely(radix_tree_is_internal_node(arg)); } /** * radix_tree_exception - radix_tree_deref_slot returned either exception? * @arg: value returned by radix_tree_deref_slot * Returns: 0 if well-aligned pointer, non-0 if either kind of exception. */ static inline int radix_tree_exception(void *arg) { return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK); } int radix_tree_insert(struct radix_tree_root *, unsigned long index, void *); void *__radix_tree_lookup(const struct radix_tree_root *, unsigned long index, struct radix_tree_node **nodep, void __rcu ***slotp); void *radix_tree_lookup(const struct radix_tree_root *, unsigned long); void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *, unsigned long index); void __radix_tree_replace(struct radix_tree_root *, struct radix_tree_node *, void __rcu **slot, void *entry); void radix_tree_iter_replace(struct radix_tree_root *, const struct radix_tree_iter *, void __rcu **slot, void *entry); void radix_tree_replace_slot(struct radix_tree_root *, void __rcu **slot, void *entry); void radix_tree_iter_delete(struct radix_tree_root *, struct radix_tree_iter *iter, void __rcu **slot); void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); void *radix_tree_delete(struct radix_tree_root *, unsigned long); unsigned int radix_tree_gang_lookup(const struct radix_tree_root *, void **results, unsigned long first_index, unsigned int max_items); int radix_tree_preload(gfp_t gfp_mask); int radix_tree_maybe_preload(gfp_t gfp_mask); void radix_tree_init(void); void *radix_tree_tag_set(struct radix_tree_root *, unsigned long index, unsigned int tag); void *radix_tree_tag_clear(struct radix_tree_root *, unsigned long index, unsigned int tag); int radix_tree_tag_get(const struct radix_tree_root *, unsigned long index, unsigned int tag); void radix_tree_iter_tag_clear(struct radix_tree_root *, const struct radix_tree_iter *iter, unsigned int tag); unsigned int radix_tree_gang_lookup_tag(const struct radix_tree_root *, void **results, unsigned long first_index, unsigned int max_items, unsigned int tag); unsigned int radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *, void __rcu ***results, unsigned long first_index, unsigned int max_items, unsigned int tag); int radix_tree_tagged(const struct radix_tree_root *, unsigned int tag); static inline void radix_tree_preload_end(void) { local_unlock(&radix_tree_preloads.lock); } void __rcu **idr_get_free(struct radix_tree_root *root, struct radix_tree_iter *iter, gfp_t gfp, unsigned long max); enum { RADIX_TREE_ITER_TAG_MASK = 0x0f, /* tag index in lower nybble */ RADIX_TREE_ITER_TAGGED = 0x10, /* lookup tagged slots */ RADIX_TREE_ITER_CONTIG = 0x20, /* stop at first hole */ }; /** * radix_tree_iter_init - initialize radix tree iterator * * @iter: pointer to iterator state * @start: iteration starting index * Returns: NULL */ static __always_inline void __rcu ** radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start) { /* * Leave iter->tags uninitialized. radix_tree_next_chunk() will fill it * in the case of a successful tagged chunk lookup. If the lookup was * unsuccessful or non-tagged then nobody cares about ->tags. * * Set index to zero to bypass next_index overflow protection. * See the comment in radix_tree_next_chunk() for details. */ iter->index = 0; iter->next_index = start; return NULL; } /** * radix_tree_next_chunk - find next chunk of slots for iteration * * @root: radix tree root * @iter: iterator state * @flags: RADIX_TREE_ITER_* flags and tag index * Returns: pointer to chunk first slot, or NULL if there no more left * * This function looks up the next chunk in the radix tree starting from * @iter->next_index. It returns a pointer to the chunk's first slot. * Also it fills @iter with data about chunk: position in the tree (index), * its end (next_index), and constructs a bit mask for tagged iterating (tags). */ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *, struct radix_tree_iter *iter, unsigned flags); /** * radix_tree_iter_lookup - look up an index in the radix tree * @root: radix tree root * @iter: iterator state * @index: key to look up * * If @index is present in the radix tree, this function returns the slot * containing it and updates @iter to describe the entry. If @index is not * present, it returns NULL. */ static inline void __rcu ** radix_tree_iter_lookup(const struct radix_tree_root *root, struct radix_tree_iter *iter, unsigned long index) { radix_tree_iter_init(iter, index); return radix_tree_next_chunk(root, iter, RADIX_TREE_ITER_CONTIG); } /** * radix_tree_iter_retry - retry this chunk of the iteration * @iter: iterator state * * If we iterate over a tree protected only by the RCU lock, a race * against deletion or creation may result in seeing a slot for which * radix_tree_deref_retry() returns true. If so, call this function * and continue the iteration. */ static inline __must_check void __rcu **radix_tree_iter_retry(struct radix_tree_iter *iter) { iter->next_index = iter->index; iter->tags = 0; return NULL; } static inline unsigned long __radix_tree_iter_add(struct radix_tree_iter *iter, unsigned long slots) { return iter->index + slots; } /** * radix_tree_iter_resume - resume iterating when the chunk may be invalid * @slot: pointer to current slot * @iter: iterator state * Returns: New slot pointer * * If the iterator needs to release then reacquire a lock, the chunk may * have been invalidated by an insertion or deletion. Call this function * before releasing the lock to continue the iteration from the next index. */ void __rcu **__must_check radix_tree_iter_resume(void __rcu **slot, struct radix_tree_iter *iter); /** * radix_tree_chunk_size - get current chunk size * * @iter: pointer to radix tree iterator * Returns: current chunk size */ static __always_inline long radix_tree_chunk_size(struct radix_tree_iter *iter) { return iter->next_index - iter->index; } /** * radix_tree_next_slot - find next slot in chunk * * @slot: pointer to current slot * @iter: pointer to iterator state * @flags: RADIX_TREE_ITER_*, should be constant * Returns: pointer to next slot, or NULL if there no more left * * This function updates @iter->index in the case of a successful lookup. * For tagged lookup it also eats @iter->tags. * * There are several cases where 'slot' can be passed in as NULL to this * function. These cases result from the use of radix_tree_iter_resume() or * radix_tree_iter_retry(). In these cases we don't end up dereferencing * 'slot' because either: * a) we are doing tagged iteration and iter->tags has been set to 0, or * b) we are doing non-tagged iteration, and iter->index and iter->next_index * have been set up so that radix_tree_chunk_size() returns 1 or 0. */ static __always_inline void __rcu **radix_tree_next_slot(void __rcu **slot, struct radix_tree_iter *iter, unsigned flags) { if (flags & RADIX_TREE_ITER_TAGGED) { iter->tags >>= 1; if (unlikely(!iter->tags)) return NULL; if (likely(iter->tags & 1ul)) { iter->index = __radix_tree_iter_add(iter, 1); slot++; goto found; } if (!(flags & RADIX_TREE_ITER_CONTIG)) { unsigned offset = __ffs(iter->tags); iter->tags >>= offset++; iter->index = __radix_tree_iter_add(iter, offset); slot += offset; goto found; } } else { long count = radix_tree_chunk_size(iter); while (--count > 0) { slot++; iter->index = __radix_tree_iter_add(iter, 1); if (likely(*slot)) goto found; if (flags & RADIX_TREE_ITER_CONTIG) { /* forbid switching to the next chunk */ iter->next_index = 0; break; } } } return NULL; found: return slot; } /** * radix_tree_for_each_slot - iterate over non-empty slots * * @slot: the void** variable for pointer to slot * @root: the struct radix_tree_root pointer * @iter: the struct radix_tree_iter pointer * @start: iteration starting index * * @slot points to radix tree slot, @iter->index contains its index. */ #define radix_tree_for_each_slot(slot, root, iter, start) \ for (slot = radix_tree_iter_init(iter, start) ; \ slot || (slot = radix_tree_next_chunk(root, iter, 0)) ; \ slot = radix_tree_next_slot(slot, iter, 0)) /** * radix_tree_for_each_tagged - iterate over tagged slots * * @slot: the void** variable for pointer to slot * @root: the struct radix_tree_root pointer * @iter: the struct radix_tree_iter pointer * @start: iteration starting index * @tag: tag index * * @slot points to radix tree slot, @iter->index contains its index. */ #define radix_tree_for_each_tagged(slot, root, iter, start, tag) \ for (slot = radix_tree_iter_init(iter, start) ; \ slot || (slot = radix_tree_next_chunk(root, iter, \ RADIX_TREE_ITER_TAGGED | tag)) ; \ slot = radix_tree_next_slot(slot, iter, \ RADIX_TREE_ITER_TAGGED | tag)) #endif /* _LINUX_RADIX_TREE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 1999-2002 Vojtech Pavlik */ #ifndef _SERIO_H #define _SERIO_H #include <linux/types.h> #include <linux/interrupt.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/mutex.h> #include <linux/device.h> #include <linux/mod_devicetable.h> #include <uapi/linux/serio.h> extern struct bus_type serio_bus; struct serio { void *port_data; char name[32]; char phys[32]; char firmware_id[128]; bool manual_bind; struct serio_device_id id; /* Protects critical sections from port's interrupt handler */ spinlock_t lock; int (*write)(struct serio *, unsigned char); int (*open)(struct serio *); void (*close)(struct serio *); int (*start)(struct serio *); void (*stop)(struct serio *); struct serio *parent; /* Entry in parent->children list */ struct list_head child_node; struct list_head children; /* Level of nesting in serio hierarchy */ unsigned int depth; /* * serio->drv is accessed from interrupt handlers; when modifying * caller should acquire serio->drv_mutex and serio->lock. */ struct serio_driver *drv; /* Protects serio->drv so attributes can pin current driver */ struct mutex drv_mutex; struct device dev; struct list_head node; /* * For use by PS/2 layer when several ports share hardware and * may get indigestion when exposed to concurrent access (i8042). */ struct mutex *ps2_cmd_mutex; }; #define to_serio_port(d) container_of(d, struct serio, dev) struct serio_driver { const char *description; const struct serio_device_id *id_table; bool manual_bind; void (*write_wakeup)(struct serio *); irqreturn_t (*interrupt)(struct serio *, unsigned char, unsigned int); int (*connect)(struct serio *, struct serio_driver *drv); int (*reconnect)(struct serio *); int (*fast_reconnect)(struct serio *); void (*disconnect)(struct serio *); void (*cleanup)(struct serio *); struct device_driver driver; }; #define to_serio_driver(d) container_of(d, struct serio_driver, driver) int serio_open(struct serio *serio, struct serio_driver *drv); void serio_close(struct serio *serio); void serio_rescan(struct serio *serio); void serio_reconnect(struct serio *serio); irqreturn_t serio_interrupt(struct serio *serio, unsigned char data, unsigned int flags); void __serio_register_port(struct serio *serio, struct module *owner); /* use a define to avoid include chaining to get THIS_MODULE */ #define serio_register_port(serio) \ __serio_register_port(serio, THIS_MODULE) void serio_unregister_port(struct serio *serio); void serio_unregister_child_port(struct serio *serio); int __must_check __serio_register_driver(struct serio_driver *drv, struct module *owner, const char *mod_name); /* use a define to avoid include chaining to get THIS_MODULE & friends */ #define serio_register_driver(drv) \ __serio_register_driver(drv, THIS_MODULE, KBUILD_MODNAME) void serio_unregister_driver(struct serio_driver *drv); /** * module_serio_driver() - Helper macro for registering a serio driver * @__serio_driver: serio_driver struct * * Helper macro for serio drivers which do not do anything special in * module init/exit. This eliminates a lot of boilerplate. Each module * may only use this macro once, and calling it replaces module_init() * and module_exit(). */ #define module_serio_driver(__serio_driver) \ module_driver(__serio_driver, serio_register_driver, \ serio_unregister_driver) static inline int serio_write(struct serio *serio, unsigned char data) { if (serio->write) return serio->write(serio, data); else return -1; } static inline void serio_drv_write_wakeup(struct serio *serio) { if (serio->drv && serio->drv->write_wakeup) serio->drv->write_wakeup(serio); } /* * Use the following functions to manipulate serio's per-port * driver-specific data. */ static inline void *serio_get_drvdata(struct serio *serio) { return dev_get_drvdata(&serio->dev); } static inline void serio_set_drvdata(struct serio *serio, void *data) { dev_set_drvdata(&serio->dev, data); } /* * Use the following functions to protect critical sections in * driver code from port's interrupt handler */ static inline void serio_pause_rx(struct serio *serio) { spin_lock_irq(&serio->lock); } static inline void serio_continue_rx(struct serio *serio) { spin_unlock_irq(&serio->lock); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PVCLOCK_H #define _ASM_X86_PVCLOCK_H #include <asm/clocksource.h> #include <asm/pvclock-abi.h> /* some helper functions for xen and kvm pv clock sources */ u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src); void pvclock_set_flags(u8 flags); unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); void pvclock_read_wallclock(struct pvclock_wall_clock *wall, struct pvclock_vcpu_time_info *vcpu, struct timespec64 *ts); void pvclock_resume(void); void pvclock_touch_watchdogs(void); static __always_inline unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src) { unsigned version = src->version & ~1; /* Make sure that the version is read before the data. */ virt_rmb(); return version; } static __always_inline bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src, unsigned version) { /* Make sure that the version is re-read after the data. */ virt_rmb(); return unlikely(version != src->version); } /* * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, * yielding a 64-bit result. */ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) { u64 product; #ifdef __i386__ u32 tmp1, tmp2; #else ulong tmp; #endif if (shift < 0) delta >>= -shift; else delta <<= shift; #ifdef __i386__ __asm__ ( "mul %5 ; " "mov %4,%%eax ; " "mov %%edx,%4 ; " "mul %5 ; " "xor %5,%5 ; " "add %4,%%eax ; " "adc %5,%%edx ; " : "=A" (product), "=r" (tmp1), "=r" (tmp2) : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); #elif defined(__x86_64__) __asm__ ( "mulq %[mul_frac] ; shrd $32, %[hi], %[lo]" : [lo]"=a"(product), [hi]"=d"(tmp) : "0"(delta), [mul_frac]"rm"((u64)mul_frac)); #else #error implement me! #endif return product; } static __always_inline u64 __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, u64 tsc) { u64 delta = tsc - src->tsc_timestamp; u64 offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, src->tsc_shift); return src->system_time + offset; } struct pvclock_vsyscall_time_info { struct pvclock_vcpu_time_info pvti; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) #ifdef CONFIG_PARAVIRT_CLOCK void pvclock_set_pvti_cpu0_va(struct pvclock_vsyscall_time_info *pvti); struct pvclock_vsyscall_time_info *pvclock_get_pvti_cpu0_va(void); #else static inline struct pvclock_vsyscall_time_info *pvclock_get_pvti_cpu0_va(void) { return NULL; } #endif #endif /* _ASM_X86_PVCLOCK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BLOCKGROUP_LOCK_H #define _LINUX_BLOCKGROUP_LOCK_H /* * Per-blockgroup locking for ext2 and ext3. * * Simple hashed spinlocking. */ #include <linux/spinlock.h> #include <linux/cache.h> #ifdef CONFIG_SMP #define NR_BG_LOCKS (4 << ilog2(NR_CPUS < 32 ? NR_CPUS : 32)) #else #define NR_BG_LOCKS 1 #endif struct bgl_lock { spinlock_t lock; } ____cacheline_aligned_in_smp; struct blockgroup_lock { struct bgl_lock locks[NR_BG_LOCKS]; }; static inline void bgl_lock_init(struct blockgroup_lock *bgl) { int i; for (i = 0; i < NR_BG_LOCKS; i++) spin_lock_init(&bgl->locks[i].lock); } static inline spinlock_t * bgl_lock_ptr(struct blockgroup_lock *bgl, unsigned int block_group) { return &bgl->locks[block_group & (NR_BG_LOCKS-1)].lock; } #endif
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 // SPDX-License-Identifier: GPL-2.0-only /* * A generic implementation of binary search for the Linux kernel * * Copyright (C) 2008-2009 Ksplice, Inc. * Author: Tim Abbott <tabbott@ksplice.com> */ #include <linux/export.h> #include <linux/bsearch.h> #include <linux/kprobes.h> /* * bsearch - binary search an array of elements * @key: pointer to item being searched for * @base: pointer to first element to search * @num: number of elements * @size: size of each element * @cmp: pointer to comparison function * * This function does a binary search on the given array. The * contents of the array should already be in ascending sorted order * under the provided comparison function. * * Note that the key need not have the same type as the elements in * the array, e.g. key could be a string and the comparison function * could compare the string with the struct's name field. However, if * the key and elements in the array are of the same type, you can use * the same comparison function for both sort() and bsearch(). */ void *bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp) { return __inline_bsearch(key, base, num, size, cmp); } EXPORT_SYMBOL(bsearch); NOKPROBE_SYMBOL(bsearch);
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PID_H #define _LINUX_PID_H #include <linux/rculist.h> #include <linux/wait.h> #include <linux/refcount.h> enum pid_type { PIDTYPE_PID, PIDTYPE_TGID, PIDTYPE_PGID, PIDTYPE_SID, PIDTYPE_MAX, }; /* * What is struct pid? * * A struct pid is the kernel's internal notion of a process identifier. * It refers to individual tasks, process groups, and sessions. While * there are processes attached to it the struct pid lives in a hash * table, so it and then the processes that it refers to can be found * quickly from the numeric pid value. The attached processes may be * quickly accessed by following pointers from struct pid. * * Storing pid_t values in the kernel and referring to them later has a * problem. The process originally with that pid may have exited and the * pid allocator wrapped, and another process could have come along * and been assigned that pid. * * Referring to user space processes by holding a reference to struct * task_struct has a problem. When the user space process exits * the now useless task_struct is still kept. A task_struct plus a * stack consumes around 10K of low kernel memory. More precisely * this is THREAD_SIZE + sizeof(struct task_struct). By comparison * a struct pid is about 64 bytes. * * Holding a reference to struct pid solves both of these problems. * It is small so holding a reference does not consume a lot of * resources, and since a new struct pid is allocated when the numeric pid * value is reused (when pids wrap around) we don't mistakenly refer to new * processes. */ /* * struct upid is used to get the id of the struct pid, as it is * seen in particular namespace. Later the struct pid is found with * find_pid_ns() using the int nr and struct pid_namespace *ns. */ struct upid { int nr; struct pid_namespace *ns; }; struct pid { refcount_t count; unsigned int level; spinlock_t lock; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; struct hlist_head inodes; /* wait queue for pidfd notifications */ wait_queue_head_t wait_pidfd; struct rcu_head rcu; struct upid numbers[1]; }; extern struct pid init_struct_pid; extern const struct file_operations pidfd_fops; struct file; extern struct pid *pidfd_pid(const struct file *file); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); static inline struct pid *get_pid(struct pid *pid) { if (pid) refcount_inc(&pid->count); return pid; } extern void put_pid(struct pid *pid); extern struct task_struct *pid_task(struct pid *pid, enum pid_type); static inline bool pid_has_task(struct pid *pid, enum pid_type type) { return !hlist_empty(&pid->tasks[type]); } extern struct task_struct *get_pid_task(struct pid *pid, enum pid_type); extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type); /* * these helpers must be called with the tasklist_lock write-held. */ extern void attach_pid(struct task_struct *task, enum pid_type); extern void detach_pid(struct task_struct *task, enum pid_type); extern void change_pid(struct task_struct *task, enum pid_type, struct pid *pid); extern void exchange_tids(struct task_struct *task, struct task_struct *old); extern void transfer_pid(struct task_struct *old, struct task_struct *new, enum pid_type); struct pid_namespace; extern struct pid_namespace init_pid_ns; extern int pid_max; extern int pid_max_min, pid_max_max; /* * look up a PID in the hash table. Must be called with the tasklist_lock * or rcu_read_lock() held. * * find_pid_ns() finds the pid in the namespace specified * find_vpid() finds the pid by its virtual id, i.e. in the current namespace * * see also find_task_by_vpid() set in include/linux/sched.h */ extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns); extern struct pid *find_vpid(int nr); /* * Lookup a PID in the hash table, and return with it's count elevated. */ extern struct pid *find_get_pid(int nr); extern struct pid *find_ge_pid(int nr, struct pid_namespace *); extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, size_t set_tid_size); extern void free_pid(struct pid *pid); extern void disable_pid_allocation(struct pid_namespace *ns); /* * ns_of_pid() returns the pid namespace in which the specified pid was * allocated. * * NOTE: * ns_of_pid() is expected to be called for a process (task) that has * an attached 'struct pid' (see attach_pid(), detach_pid()) i.e @pid * is expected to be non-NULL. If @pid is NULL, caller should handle * the resulting NULL pid-ns. */ static inline struct pid_namespace *ns_of_pid(struct pid *pid) { struct pid_namespace *ns = NULL; if (pid) ns = pid->numbers[pid->level].ns; return ns; } /* * is_child_reaper returns true if the pid is the init process * of the current namespace. As this one could be checked before * pid_ns->child_reaper is assigned in copy_process, we check * with the pid number. */ static inline bool is_child_reaper(struct pid *pid) { return pid->numbers[pid->level].nr == 1; } /* * the helpers to get the pid's id seen from different namespaces * * pid_nr() : global id, i.e. the id seen from the init namespace; * pid_vnr() : virtual id, i.e. the id seen from the pid namespace of * current. * pid_nr_ns() : id seen from the ns specified. * * see also task_xid_nr() etc in include/linux/sched.h */ static inline pid_t pid_nr(struct pid *pid) { pid_t nr = 0; if (pid) nr = pid->numbers[0].nr; return nr; } pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns); pid_t pid_vnr(struct pid *pid); #define do_each_pid_task(pid, type, task) \ do { \ if ((pid) != NULL) \ hlist_for_each_entry_rcu((task), \ &(pid)->tasks[type], pid_links[type]) { /* * Both old and new leaders may be attached to * the same pid in the middle of de_thread(). */ #define while_each_pid_task(pid, type, task) \ if (type == PIDTYPE_PID) \ break; \ } \ } while (0) #define do_each_pid_thread(pid, type, task) \ do_each_pid_task(pid, type, task) { \ struct task_struct *tg___ = task; \ for_each_thread(tg___, task) { #define while_each_pid_thread(pid, type, task) \ } \ task = tg___; \ } while_each_pid_task(pid, type, task) #endif /* _LINUX_PID_H */
1 2 3 4 5 6 7 8 9 10 11 12 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MSDOS_FS_H #define _LINUX_MSDOS_FS_H #include <uapi/linux/msdos_fs.h> /* media of boot sector */ static inline int fat_valid_media(u8 media) { return 0xf8 <= media || media == 0xf0; } #endif /* !_LINUX_MSDOS_FS_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. NET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the Ethernet handlers. * * Version: @(#)eth.h 1.0.4 05/13/93 * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * * Relocated to include/linux where it belongs by Alan Cox * <gw4pts@gw4pts.ampr.org> */ #ifndef _LINUX_ETHERDEVICE_H #define _LINUX_ETHERDEVICE_H #include <linux/if_ether.h> #include <linux/netdevice.h> #include <linux/random.h> #include <linux/crc32.h> #include <asm/unaligned.h> #include <asm/bitsperlong.h> #ifdef __KERNEL__ struct device; int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr); unsigned char *arch_get_platform_mac_address(void); int nvmem_get_mac_address(struct device *dev, void *addrbuf); u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len); __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); extern const struct header_ops eth_header_ops; int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned len); int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr); int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); void eth_header_cache_update(struct hh_cache *hh, const struct net_device *dev, const unsigned char *haddr); __be16 eth_header_parse_protocol(const struct sk_buff *skb); int eth_prepare_mac_addr_change(struct net_device *dev, void *p); void eth_commit_mac_addr_change(struct net_device *dev, void *p); int eth_mac_addr(struct net_device *dev, void *p); int eth_validate_addr(struct net_device *dev); struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, unsigned int rxqs); #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1) #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count) struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv, unsigned int txqs, unsigned int rxqs); #define devm_alloc_etherdev(dev, sizeof_priv) devm_alloc_etherdev_mqs(dev, sizeof_priv, 1, 1) struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb); int eth_gro_complete(struct sk_buff *skb, int nhoff); /* Reserved Ethernet Addresses per IEEE 802.1Q */ static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; #define eth_stp_addr eth_reserved_addr_base /** * is_link_local_ether_addr - Determine if given Ethernet address is link-local * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if address is link local reserved addr (01:80:c2:00:00:0X) per * IEEE 802.1Q 8.6.3 Frame filtering. * * Please note: addr must be aligned to u16. */ static inline bool is_link_local_ether_addr(const u8 *addr) { __be16 *a = (__be16 *)addr; static const __be16 *b = (const __be16 *)eth_reserved_addr_base; static const __be16 m = cpu_to_be16(0xfff0); #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) return (((*(const u32 *)addr) ^ (*(const u32 *)b)) | (__force int)((a[2] ^ b[2]) & m)) == 0; #else return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0; #endif } /** * is_zero_ether_addr - Determine if give Ethernet address is all zeros. * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if the address is all zeroes. * * Please note: addr must be aligned to u16. */ static inline bool is_zero_ether_addr(const u8 *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) return ((*(const u32 *)addr) | (*(const u16 *)(addr + 4))) == 0; #else return (*(const u16 *)(addr + 0) | *(const u16 *)(addr + 2) | *(const u16 *)(addr + 4)) == 0; #endif } /** * is_multicast_ether_addr - Determine if the Ethernet address is a multicast. * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if the address is a multicast address. * By definition the broadcast address is also a multicast address. */ static inline bool is_multicast_ether_addr(const u8 *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) u32 a = *(const u32 *)addr; #else u16 a = *(const u16 *)addr; #endif #ifdef __BIG_ENDIAN return 0x01 & (a >> ((sizeof(a) * 8) - 8)); #else return 0x01 & a; #endif } static inline bool is_multicast_ether_addr_64bits(const u8 addr[6+2]) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 #ifdef __BIG_ENDIAN return 0x01 & ((*(const u64 *)addr) >> 56); #else return 0x01 & (*(const u64 *)addr); #endif #else return is_multicast_ether_addr(addr); #endif } /** * is_local_ether_addr - Determine if the Ethernet address is locally-assigned one (IEEE 802). * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if the address is a local address. */ static inline bool is_local_ether_addr(const u8 *addr) { return 0x02 & addr[0]; } /** * is_broadcast_ether_addr - Determine if the Ethernet address is broadcast * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if the address is the broadcast address. * * Please note: addr must be aligned to u16. */ static inline bool is_broadcast_ether_addr(const u8 *addr) { return (*(const u16 *)(addr + 0) & *(const u16 *)(addr + 2) & *(const u16 *)(addr + 4)) == 0xffff; } /** * is_unicast_ether_addr - Determine if the Ethernet address is unicast * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if the address is a unicast address. */ static inline bool is_unicast_ether_addr(const u8 *addr) { return !is_multicast_ether_addr(addr); } /** * is_valid_ether_addr - Determine if the given Ethernet address is valid * @addr: Pointer to a six-byte array containing the Ethernet address * * Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is not * a multicast address, and is not FF:FF:FF:FF:FF:FF. * * Return true if the address is valid. * * Please note: addr must be aligned to u16. */ static inline bool is_valid_ether_addr(const u8 *addr) { /* FF:FF:FF:FF:FF:FF is a multicast address so we don't need to * explicitly check for it here. */ return !is_multicast_ether_addr(addr) && !is_zero_ether_addr(addr); } /** * eth_proto_is_802_3 - Determine if a given Ethertype/length is a protocol * @proto: Ethertype/length value to be tested * * Check that the value from the Ethertype/length field is a valid Ethertype. * * Return true if the valid is an 802.3 supported Ethertype. */ static inline bool eth_proto_is_802_3(__be16 proto) { #ifndef __BIG_ENDIAN /* if CPU is little endian mask off bits representing LSB */ proto &= htons(0xFF00); #endif /* cast both to u16 and compare since LSB can be ignored */ return (__force u16)proto >= (__force u16)htons(ETH_P_802_3_MIN); } /** * eth_random_addr - Generate software assigned random Ethernet address * @addr: Pointer to a six-byte array containing the Ethernet address * * Generate a random Ethernet address (MAC) that is not multicast * and has the local assigned bit set. */ static inline void eth_random_addr(u8 *addr) { get_random_bytes(addr, ETH_ALEN); addr[0] &= 0xfe; /* clear multicast bit */ addr[0] |= 0x02; /* set local assignment bit (IEEE802) */ } #define random_ether_addr(addr) eth_random_addr(addr) /** * eth_broadcast_addr - Assign broadcast address * @addr: Pointer to a six-byte array containing the Ethernet address * * Assign the broadcast address to the given address array. */ static inline void eth_broadcast_addr(u8 *addr) { memset(addr, 0xff, ETH_ALEN); } /** * eth_zero_addr - Assign zero address * @addr: Pointer to a six-byte array containing the Ethernet address * * Assign the zero address to the given address array. */ static inline void eth_zero_addr(u8 *addr) { memset(addr, 0x00, ETH_ALEN); } /** * eth_hw_addr_random - Generate software assigned random Ethernet and * set device flag * @dev: pointer to net_device structure * * Generate a random Ethernet address (MAC) to be used by a net device * and set addr_assign_type so the state can be read by sysfs and be * used by userspace. */ static inline void eth_hw_addr_random(struct net_device *dev) { dev->addr_assign_type = NET_ADDR_RANDOM; eth_random_addr(dev->dev_addr); } /** * eth_hw_addr_crc - Calculate CRC from netdev_hw_addr * @ha: pointer to hardware address * * Calculate CRC from a hardware address as basis for filter hashes. */ static inline u32 eth_hw_addr_crc(struct netdev_hw_addr *ha) { return ether_crc(ETH_ALEN, ha->addr); } /** * ether_addr_copy - Copy an Ethernet address * @dst: Pointer to a six-byte array Ethernet address destination * @src: Pointer to a six-byte array Ethernet address source * * Please note: dst & src must both be aligned to u16. */ static inline void ether_addr_copy(u8 *dst, const u8 *src) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) *(u32 *)dst = *(const u32 *)src; *(u16 *)(dst + 4) = *(const u16 *)(src + 4); #else u16 *a = (u16 *)dst; const u16 *b = (const u16 *)src; a[0] = b[0]; a[1] = b[1]; a[2] = b[2]; #endif } /** * eth_hw_addr_inherit - Copy dev_addr from another net_device * @dst: pointer to net_device to copy dev_addr to * @src: pointer to net_device to copy dev_addr from * * Copy the Ethernet address from one net_device to another along with * the address attributes (addr_assign_type). */ static inline void eth_hw_addr_inherit(struct net_device *dst, struct net_device *src) { dst->addr_assign_type = src->addr_assign_type; ether_addr_copy(dst->dev_addr, src->dev_addr); } /** * ether_addr_equal - Compare two Ethernet addresses * @addr1: Pointer to a six-byte array containing the Ethernet address * @addr2: Pointer other six-byte array containing the Ethernet address * * Compare two Ethernet addresses, returns true if equal * * Please note: addr1 & addr2 must both be aligned to u16. */ static inline bool ether_addr_equal(const u8 *addr1, const u8 *addr2) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) u32 fold = ((*(const u32 *)addr1) ^ (*(const u32 *)addr2)) | ((*(const u16 *)(addr1 + 4)) ^ (*(const u16 *)(addr2 + 4))); return fold == 0; #else const u16 *a = (const u16 *)addr1; const u16 *b = (const u16 *)addr2; return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0; #endif } /** * ether_addr_equal_64bits - Compare two Ethernet addresses * @addr1: Pointer to an array of 8 bytes * @addr2: Pointer to an other array of 8 bytes * * Compare two Ethernet addresses, returns true if equal, false otherwise. * * The function doesn't need any conditional branches and possibly uses * word memory accesses on CPU allowing cheap unaligned memory reads. * arrays = { byte1, byte2, byte3, byte4, byte5, byte6, pad1, pad2 } * * Please note that alignment of addr1 & addr2 are only guaranteed to be 16 bits. */ static inline bool ether_addr_equal_64bits(const u8 addr1[6+2], const u8 addr2[6+2]) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 u64 fold = (*(const u64 *)addr1) ^ (*(const u64 *)addr2); #ifdef __BIG_ENDIAN return (fold >> 16) == 0; #else return (fold << 16) == 0; #endif #else return ether_addr_equal(addr1, addr2); #endif } /** * ether_addr_equal_unaligned - Compare two not u16 aligned Ethernet addresses * @addr1: Pointer to a six-byte array containing the Ethernet address * @addr2: Pointer other six-byte array containing the Ethernet address * * Compare two Ethernet addresses, returns true if equal * * Please note: Use only when any Ethernet address may not be u16 aligned. */ static inline bool ether_addr_equal_unaligned(const u8 *addr1, const u8 *addr2) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) return ether_addr_equal(addr1, addr2); #else return memcmp(addr1, addr2, ETH_ALEN) == 0; #endif } /** * ether_addr_equal_masked - Compare two Ethernet addresses with a mask * @addr1: Pointer to a six-byte array containing the 1st Ethernet address * @addr2: Pointer to a six-byte array containing the 2nd Ethernet address * @mask: Pointer to a six-byte array containing the Ethernet address bitmask * * Compare two Ethernet addresses with a mask, returns true if for every bit * set in the bitmask the equivalent bits in the ethernet addresses are equal. * Using a mask with all bits set is a slower ether_addr_equal. */ static inline bool ether_addr_equal_masked(const u8 *addr1, const u8 *addr2, const u8 *mask) { int i; for (i = 0; i < ETH_ALEN; i++) { if ((addr1[i] ^ addr2[i]) & mask[i]) return false; } return true; } /** * ether_addr_to_u64 - Convert an Ethernet address into a u64 value. * @addr: Pointer to a six-byte array containing the Ethernet address * * Return a u64 value of the address */ static inline u64 ether_addr_to_u64(const u8 *addr) { u64 u = 0; int i; for (i = 0; i < ETH_ALEN; i++) u = u << 8 | addr[i]; return u; } /** * u64_to_ether_addr - Convert a u64 to an Ethernet address. * @u: u64 to convert to an Ethernet MAC address * @addr: Pointer to a six-byte array to contain the Ethernet address */ static inline void u64_to_ether_addr(u64 u, u8 *addr) { int i; for (i = ETH_ALEN - 1; i >= 0; i--) { addr[i] = u & 0xff; u = u >> 8; } } /** * eth_addr_dec - Decrement the given MAC address * * @addr: Pointer to a six-byte array containing Ethernet address to decrement */ static inline void eth_addr_dec(u8 *addr) { u64 u = ether_addr_to_u64(addr); u--; u64_to_ether_addr(u, addr); } /** * eth_addr_inc() - Increment the given MAC address. * @addr: Pointer to a six-byte array containing Ethernet address to increment. */ static inline void eth_addr_inc(u8 *addr) { u64 u = ether_addr_to_u64(addr); u++; u64_to_ether_addr(u, addr); } /** * is_etherdev_addr - Tell if given Ethernet address belongs to the device. * @dev: Pointer to a device structure * @addr: Pointer to a six-byte array containing the Ethernet address * * Compare passed address with all addresses of the device. Return true if the * address if one of the device addresses. * * Note that this function calls ether_addr_equal_64bits() so take care of * the right padding. */ static inline bool is_etherdev_addr(const struct net_device *dev, const u8 addr[6 + 2]) { struct netdev_hw_addr *ha; bool res = false; rcu_read_lock(); for_each_dev_addr(dev, ha) { res = ether_addr_equal_64bits(addr, ha->addr); if (res) break; } rcu_read_unlock(); return res; } #endif /* __KERNEL__ */ /** * compare_ether_header - Compare two Ethernet headers * @a: Pointer to Ethernet header * @b: Pointer to Ethernet header * * Compare two Ethernet headers, returns 0 if equal. * This assumes that the network header (i.e., IP header) is 4-byte * aligned OR the platform can handle unaligned access. This is the * case for all packets coming into netif_receive_skb or similar * entry points. */ static inline unsigned long compare_ether_header(const void *a, const void *b) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 unsigned long fold; /* * We want to compare 14 bytes: * [a0 ... a13] ^ [b0 ... b13] * Use two long XOR, ORed together, with an overlap of two bytes. * [a0 a1 a2 a3 a4 a5 a6 a7 ] ^ [b0 b1 b2 b3 b4 b5 b6 b7 ] | * [a6 a7 a8 a9 a10 a11 a12 a13] ^ [b6 b7 b8 b9 b10 b11 b12 b13] * This means the [a6 a7] ^ [b6 b7] part is done two times. */ fold = *(unsigned long *)a ^ *(unsigned long *)b; fold |= *(unsigned long *)(a + 6) ^ *(unsigned long *)(b + 6); return fold; #else u32 *a32 = (u32 *)((u8 *)a + 2); u32 *b32 = (u32 *)((u8 *)b + 2); return (*(u16 *)a ^ *(u16 *)b) | (a32[0] ^ b32[0]) | (a32[1] ^ b32[1]) | (a32[2] ^ b32[2]); #endif } /** * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame * @skb: Buffer to pad * * An Ethernet frame should have a minimum size of 60 bytes. This function * takes short frames and pads them with zeros up to the 60 byte limit. */ static inline int eth_skb_pad(struct sk_buff *skb) { return skb_put_padto(skb, ETH_ZLEN); } #endif /* _LINUX_ETHERDEVICE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 /* SPDX-License-Identifier: GPL-2.0-only */ /* * IEEE802.15.4-2003 specification * * Copyright (C) 2007, 2008 Siemens AG * * Written by: * Pavel Smolenskiy <pavel.smolenskiy@gmail.com> * Maxim Gorbachyov <maxim.gorbachev@siemens.com> * Maxim Osipov <maxim.osipov@siemens.com> * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> * Alexander Smirnov <alex.bluesman.smirnov@gmail.com> */ #ifndef LINUX_IEEE802154_H #define LINUX_IEEE802154_H #include <linux/types.h> #include <linux/random.h> #define IEEE802154_MTU 127 #define IEEE802154_ACK_PSDU_LEN 5 #define IEEE802154_MIN_PSDU_LEN 9 #define IEEE802154_FCS_LEN 2 #define IEEE802154_MAX_AUTH_TAG_LEN 16 #define IEEE802154_FC_LEN 2 #define IEEE802154_SEQ_LEN 1 /* General MAC frame format: * 2 bytes: Frame Control * 1 byte: Sequence Number * 20 bytes: Addressing fields * 14 bytes: Auxiliary Security Header */ #define IEEE802154_MAX_HEADER_LEN (2 + 1 + 20 + 14) #define IEEE802154_MIN_HEADER_LEN (IEEE802154_ACK_PSDU_LEN - \ IEEE802154_FCS_LEN) #define IEEE802154_PAN_ID_BROADCAST 0xffff #define IEEE802154_ADDR_SHORT_BROADCAST 0xffff #define IEEE802154_ADDR_SHORT_UNSPEC 0xfffe #define IEEE802154_EXTENDED_ADDR_LEN 8 #define IEEE802154_SHORT_ADDR_LEN 2 #define IEEE802154_PAN_ID_LEN 2 #define IEEE802154_LIFS_PERIOD 40 #define IEEE802154_SIFS_PERIOD 12 #define IEEE802154_MAX_SIFS_FRAME_SIZE 18 #define IEEE802154_MAX_CHANNEL 26 #define IEEE802154_MAX_PAGE 31 #define IEEE802154_FC_TYPE_BEACON 0x0 /* Frame is beacon */ #define IEEE802154_FC_TYPE_DATA 0x1 /* Frame is data */ #define IEEE802154_FC_TYPE_ACK 0x2 /* Frame is acknowledgment */ #define IEEE802154_FC_TYPE_MAC_CMD 0x3 /* Frame is MAC command */ #define IEEE802154_FC_TYPE_SHIFT 0 #define IEEE802154_FC_TYPE_MASK ((1 << 3) - 1) #define IEEE802154_FC_TYPE(x) ((x & IEEE802154_FC_TYPE_MASK) >> IEEE802154_FC_TYPE_SHIFT) #define IEEE802154_FC_SET_TYPE(v, x) do { \ v = (((v) & ~IEEE802154_FC_TYPE_MASK) | \ (((x) << IEEE802154_FC_TYPE_SHIFT) & IEEE802154_FC_TYPE_MASK)); \ } while (0) #define IEEE802154_FC_SECEN_SHIFT 3 #define IEEE802154_FC_SECEN (1 << IEEE802154_FC_SECEN_SHIFT) #define IEEE802154_FC_FRPEND_SHIFT 4 #define IEEE802154_FC_FRPEND (1 << IEEE802154_FC_FRPEND_SHIFT) #define IEEE802154_FC_ACK_REQ_SHIFT 5 #define IEEE802154_FC_ACK_REQ (1 << IEEE802154_FC_ACK_REQ_SHIFT) #define IEEE802154_FC_INTRA_PAN_SHIFT 6 #define IEEE802154_FC_INTRA_PAN (1 << IEEE802154_FC_INTRA_PAN_SHIFT) #define IEEE802154_FC_SAMODE_SHIFT 14 #define IEEE802154_FC_SAMODE_MASK (3 << IEEE802154_FC_SAMODE_SHIFT) #define IEEE802154_FC_DAMODE_SHIFT 10 #define IEEE802154_FC_DAMODE_MASK (3 << IEEE802154_FC_DAMODE_SHIFT) #define IEEE802154_FC_VERSION_SHIFT 12 #define IEEE802154_FC_VERSION_MASK (3 << IEEE802154_FC_VERSION_SHIFT) #define IEEE802154_FC_VERSION(x) ((x & IEEE802154_FC_VERSION_MASK) >> IEEE802154_FC_VERSION_SHIFT) #define IEEE802154_FC_SAMODE(x) \ (((x) & IEEE802154_FC_SAMODE_MASK) >> IEEE802154_FC_SAMODE_SHIFT) #define IEEE802154_FC_DAMODE(x) \ (((x) & IEEE802154_FC_DAMODE_MASK) >> IEEE802154_FC_DAMODE_SHIFT) #define IEEE802154_SCF_SECLEVEL_MASK 7 #define IEEE802154_SCF_SECLEVEL_SHIFT 0 #define IEEE802154_SCF_SECLEVEL(x) (x & IEEE802154_SCF_SECLEVEL_MASK) #define IEEE802154_SCF_KEY_ID_MODE_SHIFT 3 #define IEEE802154_SCF_KEY_ID_MODE_MASK (3 << IEEE802154_SCF_KEY_ID_MODE_SHIFT) #define IEEE802154_SCF_KEY_ID_MODE(x) \ ((x & IEEE802154_SCF_KEY_ID_MODE_MASK) >> IEEE802154_SCF_KEY_ID_MODE_SHIFT) #define IEEE802154_SCF_KEY_IMPLICIT 0 #define IEEE802154_SCF_KEY_INDEX 1 #define IEEE802154_SCF_KEY_SHORT_INDEX 2 #define IEEE802154_SCF_KEY_HW_INDEX 3 #define IEEE802154_SCF_SECLEVEL_NONE 0 #define IEEE802154_SCF_SECLEVEL_MIC32 1 #define IEEE802154_SCF_SECLEVEL_MIC64 2 #define IEEE802154_SCF_SECLEVEL_MIC128 3 #define IEEE802154_SCF_SECLEVEL_ENC 4 #define IEEE802154_SCF_SECLEVEL_ENC_MIC32 5 #define IEEE802154_SCF_SECLEVEL_ENC_MIC64 6 #define IEEE802154_SCF_SECLEVEL_ENC_MIC128 7 /* MAC footer size */ #define IEEE802154_MFR_SIZE 2 /* 2 octets */ /* MAC's Command Frames Identifiers */ #define IEEE802154_CMD_ASSOCIATION_REQ 0x01 #define IEEE802154_CMD_ASSOCIATION_RESP 0x02 #define IEEE802154_CMD_DISASSOCIATION_NOTIFY 0x03 #define IEEE802154_CMD_DATA_REQ 0x04 #define IEEE802154_CMD_PANID_CONFLICT_NOTIFY 0x05 #define IEEE802154_CMD_ORPHAN_NOTIFY 0x06 #define IEEE802154_CMD_BEACON_REQ 0x07 #define IEEE802154_CMD_COORD_REALIGN_NOTIFY 0x08 #define IEEE802154_CMD_GTS_REQ 0x09 /* * The return values of MAC operations */ enum { /* * The requested operation was completed successfully. * For a transmission request, this value indicates * a successful transmission. */ IEEE802154_SUCCESS = 0x0, /* The beacon was lost following a synchronization request. */ IEEE802154_BEACON_LOSS = 0xe0, /* * A transmission could not take place due to activity on the * channel, i.e., the CSMA-CA mechanism has failed. */ IEEE802154_CHNL_ACCESS_FAIL = 0xe1, /* The GTS request has been denied by the PAN coordinator. */ IEEE802154_DENINED = 0xe2, /* The attempt to disable the transceiver has failed. */ IEEE802154_DISABLE_TRX_FAIL = 0xe3, /* * The received frame induces a failed security check according to * the security suite. */ IEEE802154_FAILED_SECURITY_CHECK = 0xe4, /* * The frame resulting from secure processing has a length that is * greater than aMACMaxFrameSize. */ IEEE802154_FRAME_TOO_LONG = 0xe5, /* * The requested GTS transmission failed because the specified GTS * either did not have a transmit GTS direction or was not defined. */ IEEE802154_INVALID_GTS = 0xe6, /* * A request to purge an MSDU from the transaction queue was made using * an MSDU handle that was not found in the transaction table. */ IEEE802154_INVALID_HANDLE = 0xe7, /* A parameter in the primitive is out of the valid range.*/ IEEE802154_INVALID_PARAMETER = 0xe8, /* No acknowledgment was received after aMaxFrameRetries. */ IEEE802154_NO_ACK = 0xe9, /* A scan operation failed to find any network beacons.*/ IEEE802154_NO_BEACON = 0xea, /* No response data were available following a request. */ IEEE802154_NO_DATA = 0xeb, /* The operation failed because a short address was not allocated. */ IEEE802154_NO_SHORT_ADDRESS = 0xec, /* * A receiver enable request was unsuccessful because it could not be * completed within the CAP. */ IEEE802154_OUT_OF_CAP = 0xed, /* * A PAN identifier conflict has been detected and communicated to the * PAN coordinator. */ IEEE802154_PANID_CONFLICT = 0xee, /* A coordinator realignment command has been received. */ IEEE802154_REALIGMENT = 0xef, /* The transaction has expired and its information discarded. */ IEEE802154_TRANSACTION_EXPIRED = 0xf0, /* There is no capacity to store the transaction. */ IEEE802154_TRANSACTION_OVERFLOW = 0xf1, /* * The transceiver was in the transmitter enabled state when the * receiver was requested to be enabled. */ IEEE802154_TX_ACTIVE = 0xf2, /* The appropriate key is not available in the ACL. */ IEEE802154_UNAVAILABLE_KEY = 0xf3, /* * A SET/GET request was issued with the identifier of a PIB attribute * that is not supported. */ IEEE802154_UNSUPPORTED_ATTR = 0xf4, /* * A request to perform a scan operation failed because the MLME was * in the process of performing a previously initiated scan operation. */ IEEE802154_SCAN_IN_PROGRESS = 0xfc, }; /* frame control handling */ #define IEEE802154_FCTL_FTYPE 0x0003 #define IEEE802154_FCTL_ACKREQ 0x0020 #define IEEE802154_FCTL_SECEN 0x0004 #define IEEE802154_FCTL_INTRA_PAN 0x0040 #define IEEE802154_FCTL_DADDR 0x0c00 #define IEEE802154_FCTL_SADDR 0xc000 #define IEEE802154_FTYPE_DATA 0x0001 #define IEEE802154_FCTL_ADDR_NONE 0x0000 #define IEEE802154_FCTL_DADDR_SHORT 0x0800 #define IEEE802154_FCTL_DADDR_EXTENDED 0x0c00 #define IEEE802154_FCTL_SADDR_SHORT 0x8000 #define IEEE802154_FCTL_SADDR_EXTENDED 0xc000 /* * ieee802154_is_data - check if type is IEEE802154_FTYPE_DATA * @fc: frame control bytes in little-endian byteorder */ static inline int ieee802154_is_data(__le16 fc) { return (fc & cpu_to_le16(IEEE802154_FCTL_FTYPE)) == cpu_to_le16(IEEE802154_FTYPE_DATA); } /** * ieee802154_is_secen - check if Security bit is set * @fc: frame control bytes in little-endian byteorder */ static inline bool ieee802154_is_secen(__le16 fc) { return fc & cpu_to_le16(IEEE802154_FCTL_SECEN); } /** * ieee802154_is_ackreq - check if acknowledgment request bit is set * @fc: frame control bytes in little-endian byteorder */ static inline bool ieee802154_is_ackreq(__le16 fc) { return fc & cpu_to_le16(IEEE802154_FCTL_ACKREQ); } /** * ieee802154_is_intra_pan - check if intra pan id communication * @fc: frame control bytes in little-endian byteorder */ static inline bool ieee802154_is_intra_pan(__le16 fc) { return fc & cpu_to_le16(IEEE802154_FCTL_INTRA_PAN); } /* * ieee802154_daddr_mode - get daddr mode from fc * @fc: frame control bytes in little-endian byteorder */ static inline __le16 ieee802154_daddr_mode(__le16 fc) { return fc & cpu_to_le16(IEEE802154_FCTL_DADDR); } /* * ieee802154_saddr_mode - get saddr mode from fc * @fc: frame control bytes in little-endian byteorder */ static inline __le16 ieee802154_saddr_mode(__le16 fc) { return fc & cpu_to_le16(IEEE802154_FCTL_SADDR); } /** * ieee802154_is_valid_psdu_len - check if psdu len is valid * available lengths: * 0-4 Reserved * 5 MPDU (Acknowledgment) * 6-8 Reserved * 9-127 MPDU * * @len: psdu len with (MHR + payload + MFR) */ static inline bool ieee802154_is_valid_psdu_len(u8 len) { return (len == IEEE802154_ACK_PSDU_LEN || (len >= IEEE802154_MIN_PSDU_LEN && len <= IEEE802154_MTU)); } /** * ieee802154_is_valid_extended_unicast_addr - check if extended addr is valid * @addr: extended addr to check */ static inline bool ieee802154_is_valid_extended_unicast_addr(__le64 addr) { /* Bail out if the address is all zero, or if the group * address bit is set. */ return ((addr != cpu_to_le64(0x0000000000000000ULL)) && !(addr & cpu_to_le64(0x0100000000000000ULL))); } /** * ieee802154_is_broadcast_short_addr - check if short addr is broadcast * @addr: short addr to check */ static inline bool ieee802154_is_broadcast_short_addr(__le16 addr) { return (addr == cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST)); } /** * ieee802154_is_unspec_short_addr - check if short addr is unspecified * @addr: short addr to check */ static inline bool ieee802154_is_unspec_short_addr(__le16 addr) { return (addr == cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC)); } /** * ieee802154_is_valid_src_short_addr - check if source short address is valid * @addr: short addr to check */ static inline bool ieee802154_is_valid_src_short_addr(__le16 addr) { return !(ieee802154_is_broadcast_short_addr(addr) || ieee802154_is_unspec_short_addr(addr)); } /** * ieee802154_random_extended_addr - generates a random extended address * @addr: extended addr pointer to place the random address */ static inline void ieee802154_random_extended_addr(__le64 *addr) { get_random_bytes(addr, IEEE802154_EXTENDED_ADDR_LEN); /* clear the group bit, and set the locally administered bit */ ((u8 *)addr)[IEEE802154_EXTENDED_ADDR_LEN - 1] &= ~0x01; ((u8 *)addr)[IEEE802154_EXTENDED_ADDR_LEN - 1] |= 0x02; } #endif /* LINUX_IEEE802154_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_KASAN_H #define _LINUX_KASAN_H #include <linux/types.h> struct kmem_cache; struct page; struct vm_struct; struct task_struct; #ifdef CONFIG_KASAN #include <linux/pgtable.h> #include <asm/kasan.h> /* kasan_data struct is used in KUnit tests for KASAN expected failures */ struct kunit_kasan_expectation { bool report_expected; bool report_found; }; extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE]; extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD]; extern pud_t kasan_early_shadow_pud[PTRS_PER_PUD]; extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; int kasan_populate_early_shadow(const void *shadow_start, const void *shadow_end); static inline void *kasan_mem_to_shadow(const void *addr) { return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET; } /* Enable reporting bugs after kasan_disable_current() */ extern void kasan_enable_current(void); /* Disable reporting bugs for current task */ extern void kasan_disable_current(void); void kasan_unpoison_shadow(const void *address, size_t size); void kasan_unpoison_task_stack(struct task_struct *task); void kasan_alloc_pages(struct page *page, unsigned int order); void kasan_free_pages(struct page *page, unsigned int order); void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, slab_flags_t *flags); void kasan_poison_slab(struct page *page); void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); void kasan_poison_object_data(struct kmem_cache *cache, void *object); void * __must_check kasan_init_slab_obj(struct kmem_cache *cache, const void *object); void * __must_check kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags); void kasan_kfree_large(void *ptr, unsigned long ip); void kasan_poison_kfree(void *ptr, unsigned long ip); void * __must_check kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags); void * __must_check kasan_krealloc(const void *object, size_t new_size, gfp_t flags); void * __must_check kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags); bool kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip); struct kasan_cache { int alloc_meta_offset; int free_meta_offset; }; /* * These functions provide a special case to support backing module * allocations with real shadow memory. With KASAN vmalloc, the special * case is unnecessary, as the work is handled in the generic case. */ #ifndef CONFIG_KASAN_VMALLOC int kasan_module_alloc(void *addr, size_t size); void kasan_free_shadow(const struct vm_struct *vm); #else static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } static inline void kasan_free_shadow(const struct vm_struct *vm) {} #endif int kasan_add_zero_shadow(void *start, unsigned long size); void kasan_remove_zero_shadow(void *start, unsigned long size); size_t __ksize(const void *); static inline void kasan_unpoison_slab(const void *ptr) { kasan_unpoison_shadow(ptr, __ksize(ptr)); } size_t kasan_metadata_size(struct kmem_cache *cache); bool kasan_save_enable_multi_shot(void); void kasan_restore_multi_shot(bool enabled); #else /* CONFIG_KASAN */ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} static inline void kasan_unpoison_task_stack(struct task_struct *task) {} static inline void kasan_enable_current(void) {} static inline void kasan_disable_current(void) {} static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} static inline void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, slab_flags_t *flags) {} static inline void kasan_poison_slab(struct page *page) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) {} static inline void kasan_poison_object_data(struct kmem_cache *cache, void *object) {} static inline void *kasan_init_slab_obj(struct kmem_cache *cache, const void *object) { return (void *)object; } static inline void *kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) { return ptr; } static inline void kasan_kfree_large(void *ptr, unsigned long ip) {} static inline void kasan_poison_kfree(void *ptr, unsigned long ip) {} static inline void *kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags) { return (void *)object; } static inline void *kasan_krealloc(const void *object, size_t new_size, gfp_t flags) { return (void *)object; } static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags) { return object; } static inline bool kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip) { return false; } static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } static inline void kasan_free_shadow(const struct vm_struct *vm) {} static inline int kasan_add_zero_shadow(void *start, unsigned long size) { return 0; } static inline void kasan_remove_zero_shadow(void *start, unsigned long size) {} static inline void kasan_unpoison_slab(const void *ptr) { } static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } #endif /* CONFIG_KASAN */ #ifdef CONFIG_KASAN_GENERIC #define KASAN_SHADOW_INIT 0 void kasan_cache_shrink(struct kmem_cache *cache); void kasan_cache_shutdown(struct kmem_cache *cache); void kasan_record_aux_stack(void *ptr); #else /* CONFIG_KASAN_GENERIC */ static inline void kasan_cache_shrink(struct kmem_cache *cache) {} static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} static inline void kasan_record_aux_stack(void *ptr) {} #endif /* CONFIG_KASAN_GENERIC */ #ifdef CONFIG_KASAN_SW_TAGS #define KASAN_SHADOW_INIT 0xFF void kasan_init_tags(void); void *kasan_reset_tag(const void *addr); bool kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); #else /* CONFIG_KASAN_SW_TAGS */ static inline void kasan_init_tags(void) { } static inline void *kasan_reset_tag(const void *addr) { return (void *)addr; } #endif /* CONFIG_KASAN_SW_TAGS */ #ifdef CONFIG_KASAN_VMALLOC int kasan_populate_vmalloc(unsigned long addr, unsigned long size); void kasan_poison_vmalloc(const void *start, unsigned long size); void kasan_unpoison_vmalloc(const void *start, unsigned long size); void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end); #else static inline int kasan_populate_vmalloc(unsigned long start, unsigned long size) { return 0; } static inline void kasan_poison_vmalloc(const void *start, unsigned long size) { } static inline void kasan_unpoison_vmalloc(const void *start, unsigned long size) { } static inline void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end) {} #endif #ifdef CONFIG_KASAN_INLINE void kasan_non_canonical_hook(unsigned long addr); #else /* CONFIG_KASAN_INLINE */ static inline void kasan_non_canonical_hook(unsigned long addr) { } #endif /* CONFIG_KASAN_INLINE */ #endif /* LINUX_KASAN_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 /* SPDX-License-Identifier: GPL-2.0 */ /* * Generic nexthop implementation * * Copyright (c) 2017-19 Cumulus Networks * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> */ #ifndef __LINUX_NEXTHOP_H #define __LINUX_NEXTHOP_H #include <linux/netdevice.h> #include <linux/notifier.h> #include <linux/route.h> #include <linux/types.h> #include <net/ip_fib.h> #include <net/ip6_fib.h> #include <net/netlink.h> #define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK struct nexthop; struct nh_config { u32 nh_id; u8 nh_family; u8 nh_protocol; u8 nh_blackhole; u8 nh_fdb; u32 nh_flags; int nh_ifindex; struct net_device *dev; union { __be32 ipv4; struct in6_addr ipv6; } gw; struct nlattr *nh_grp; u16 nh_grp_type; struct nlattr *nh_encap; u16 nh_encap_type; u32 nlflags; struct nl_info nlinfo; }; struct nh_info { struct hlist_node dev_hash; /* entry on netns devhash */ struct nexthop *nh_parent; u8 family; bool reject_nh; bool fdb_nh; union { struct fib_nh_common fib_nhc; struct fib_nh fib_nh; struct fib6_nh fib6_nh; }; }; struct nh_grp_entry { struct nexthop *nh; u8 weight; atomic_t upper_bound; struct list_head nh_list; struct nexthop *nh_parent; /* nexthop of group with this entry */ }; struct nh_group { struct nh_group *spare; /* spare group for removals */ u16 num_nh; bool mpath; bool fdb_nh; bool has_v4; struct nh_grp_entry nh_entries[]; }; struct nexthop { struct rb_node rb_node; /* entry on netns rbtree */ struct list_head fi_list; /* v4 entries using nh */ struct list_head f6i_list; /* v6 entries using nh */ struct list_head fdb_list; /* fdb entries using this nh */ struct list_head grp_list; /* nh group entries using this nh */ struct net *net; u32 id; u8 protocol; /* app managing this nh */ u8 nh_flags; bool is_group; refcount_t refcnt; struct rcu_head rcu; union { struct nh_info __rcu *nh_info; struct nh_group __rcu *nh_grp; }; }; enum nexthop_event_type { NEXTHOP_EVENT_DEL }; int register_nexthop_notifier(struct net *net, struct notifier_block *nb); int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); /* caller is holding rcu or rtnl; no reference taken to nexthop */ struct nexthop *nexthop_find_by_id(struct net *net, u32 id); void nexthop_free_rcu(struct rcu_head *head); static inline bool nexthop_get(struct nexthop *nh) { return refcount_inc_not_zero(&nh->refcnt); } static inline void nexthop_put(struct nexthop *nh) { if (refcount_dec_and_test(&nh->refcnt)) call_rcu(&nh->rcu, nexthop_free_rcu); } static inline bool nexthop_cmp(const struct nexthop *nh1, const struct nexthop *nh2) { return nh1 == nh2; } static inline bool nexthop_is_fdb(const struct nexthop *nh) { if (nh->is_group) { const struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); return nh_grp->fdb_nh; } else { const struct nh_info *nhi; nhi = rcu_dereference_rtnl(nh->nh_info); return nhi->fdb_nh; } } static inline bool nexthop_has_v4(const struct nexthop *nh) { if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); return nh_grp->has_v4; } return false; } static inline bool nexthop_is_multipath(const struct nexthop *nh) { if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); return nh_grp->mpath; } return false; } struct nexthop *nexthop_select_path(struct nexthop *nh, int hash); static inline unsigned int nexthop_num_path(const struct nexthop *nh) { unsigned int rc = 1; if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); if (nh_grp->mpath) rc = nh_grp->num_nh; } return rc; } static inline struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel) { /* for_nexthops macros in fib_semantics.c grabs a pointer to * the nexthop before checking nhsel */ if (nhsel >= nhg->num_nh) return NULL; return nhg->nh_entries[nhsel].nh; } static inline int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, u8 rt_family) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); int i; for (i = 0; i < nhg->num_nh; i++) { struct nexthop *nhe = nhg->nh_entries[i].nh; struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info); struct fib_nh_common *nhc = &nhi->fib_nhc; int weight = nhg->nh_entries[i].weight; if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0) return -EMSGSIZE; } return 0; } /* called with rcu lock */ static inline bool nexthop_is_blackhole(const struct nexthop *nh) { const struct nh_info *nhi; if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); if (nh_grp->num_nh > 1) return false; nh = nh_grp->nh_entries[0].nh; } nhi = rcu_dereference_rtnl(nh->nh_info); return nhi->reject_nh; } static inline void nexthop_path_fib_result(struct fib_result *res, int hash) { struct nh_info *nhi; struct nexthop *nh; nh = nexthop_select_path(res->fi->nh, hash); nhi = rcu_dereference(nh->nh_info); res->nhc = &nhi->fib_nhc; } /* called with rcu read lock or rtnl held */ static inline struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel) { struct nh_info *nhi; BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0); BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0); if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); if (nh_grp->mpath) { nh = nexthop_mpath_select(nh_grp, nhsel); if (!nh) return NULL; } } nhi = rcu_dereference_rtnl(nh->nh_info); return &nhi->fib_nhc; } /* called from fib_table_lookup with rcu_lock */ static inline struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh, int fib_flags, const struct flowi4 *flp, int *nhsel) { struct nh_info *nhi; if (nh->is_group) { struct nh_group *nhg = rcu_dereference(nh->nh_grp); int i; for (i = 0; i < nhg->num_nh; i++) { struct nexthop *nhe = nhg->nh_entries[i].nh; nhi = rcu_dereference(nhe->nh_info); if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { *nhsel = i; return &nhi->fib_nhc; } } } else { nhi = rcu_dereference(nh->nh_info); if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { *nhsel = 0; return &nhi->fib_nhc; } } return NULL; } static inline bool nexthop_uses_dev(const struct nexthop *nh, const struct net_device *dev) { struct nh_info *nhi; if (nh->is_group) { struct nh_group *nhg = rcu_dereference(nh->nh_grp); int i; for (i = 0; i < nhg->num_nh; i++) { struct nexthop *nhe = nhg->nh_entries[i].nh; nhi = rcu_dereference(nhe->nh_info); if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) return true; } } else { nhi = rcu_dereference(nh->nh_info); if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) return true; } return false; } static inline unsigned int fib_info_num_path(const struct fib_info *fi) { if (unlikely(fi->nh)) return nexthop_num_path(fi->nh); return fi->fib_nhs; } int fib_check_nexthop(struct nexthop *nh, u8 scope, struct netlink_ext_ack *extack); static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) { if (unlikely(fi->nh)) return nexthop_fib_nhc(fi->nh, nhsel); return &fi->fib_nh[nhsel].nh_common; } /* only used when fib_nh is built into fib_info */ static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel) { WARN_ON(fi->nh); return &fi->fib_nh[nhsel]; } /* * IPv6 variants */ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, struct netlink_ext_ack *extack); /* Caller should either hold rcu_read_lock(), or RTNL. */ static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) { struct nh_info *nhi; if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); nh = nexthop_mpath_select(nh_grp, 0); if (!nh) return NULL; } nhi = rcu_dereference_rtnl(nh->nh_info); if (nhi->family == AF_INET6) return &nhi->fib6_nh; return NULL; } /* Variant of nexthop_fib6_nh(). * Caller should either hold rcu_read_lock_bh(), or RTNL. */ static inline struct fib6_nh *nexthop_fib6_nh_bh(struct nexthop *nh) { struct nh_info *nhi; if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_bh_rtnl(nh->nh_grp); nh = nexthop_mpath_select(nh_grp, 0); if (!nh) return NULL; } nhi = rcu_dereference_bh_rtnl(nh->nh_info); if (nhi->family == AF_INET6) return &nhi->fib6_nh; return NULL; } static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i) { struct fib6_nh *fib6_nh; fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh; return fib6_nh->fib_nh_dev; } static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) { struct nexthop *nh = res->f6i->nh; struct nh_info *nhi; nh = nexthop_select_path(nh, hash); nhi = rcu_dereference_rtnl(nh->nh_info); if (nhi->reject_nh) { res->fib6_type = RTN_BLACKHOLE; res->fib6_flags |= RTF_REJECT; res->nh = nexthop_fib6_nh(nh); } else { res->nh = &nhi->fib6_nh; } } int nexthop_for_each_fib6_nh(struct nexthop *nh, int (*cb)(struct fib6_nh *nh, void *arg), void *arg); static inline int nexthop_get_family(struct nexthop *nh) { struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); return nhi->family; } static inline struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh) { struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); return &nhi->fib_nhc; } static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh, int hash) { struct nh_info *nhi; struct nexthop *nhp; nhp = nexthop_select_path(nh, hash); if (unlikely(!nhp)) return NULL; nhi = rcu_dereference(nhp->nh_info); return &nhi->fib_nhc; } #endif
1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 // SPDX-License-Identifier: GPL-2.0 /* * preemptoff and irqoff tracepoints * * Copyright (C) Joel Fernandes (Google) <joel@joelfernandes.org> */ #include <linux/kallsyms.h> #include <linux/uaccess.h> #include <linux/module.h> #include <linux/ftrace.h> #include <linux/kprobes.h> #include "trace.h" #define CREATE_TRACE_POINTS #include <trace/events/preemptirq.h> #ifdef CONFIG_TRACE_IRQFLAGS /* Per-cpu variable to prevent redundant calls when IRQs already off */ static DEFINE_PER_CPU(int, tracing_irq_cpu); /* * Like trace_hardirqs_on() but without the lockdep invocation. This is * used in the low level entry code where the ordering vs. RCU is important * and lockdep uses a staged approach which splits the lockdep hardirq * tracking into a RCU on and a RCU off section. */ void trace_hardirqs_on_prepare(void) { if (this_cpu_read(tracing_irq_cpu)) { if (!in_nmi()) trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1); tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1); this_cpu_write(tracing_irq_cpu, 0); } } EXPORT_SYMBOL(trace_hardirqs_on_prepare); NOKPROBE_SYMBOL(trace_hardirqs_on_prepare); void trace_hardirqs_on(void) { if (this_cpu_read(tracing_irq_cpu)) { if (!in_nmi()) trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1); this_cpu_write(tracing_irq_cpu, 0); } lockdep_hardirqs_on_prepare(CALLER_ADDR0); lockdep_hardirqs_on(CALLER_ADDR0); } EXPORT_SYMBOL(trace_hardirqs_on); NOKPROBE_SYMBOL(trace_hardirqs_on); /* * Like trace_hardirqs_off() but without the lockdep invocation. This is * used in the low level entry code where the ordering vs. RCU is important * and lockdep uses a staged approach which splits the lockdep hardirq * tracking into a RCU on and a RCU off section. */ void trace_hardirqs_off_finish(void) { if (!this_cpu_read(tracing_irq_cpu)) { this_cpu_write(tracing_irq_cpu, 1); tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1); if (!in_nmi()) trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1); } } EXPORT_SYMBOL(trace_hardirqs_off_finish); NOKPROBE_SYMBOL(trace_hardirqs_off_finish); void trace_hardirqs_off(void) { lockdep_hardirqs_off(CALLER_ADDR0); if (!this_cpu_read(tracing_irq_cpu)) { this_cpu_write(tracing_irq_cpu, 1); tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1); if (!in_nmi()) trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); } } EXPORT_SYMBOL(trace_hardirqs_off); NOKPROBE_SYMBOL(trace_hardirqs_off); __visible void trace_hardirqs_on_caller(unsigned long caller_addr) { if (this_cpu_read(tracing_irq_cpu)) { if (!in_nmi()) trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr); tracer_hardirqs_on(CALLER_ADDR0, caller_addr); this_cpu_write(tracing_irq_cpu, 0); } lockdep_hardirqs_on_prepare(CALLER_ADDR0); lockdep_hardirqs_on(CALLER_ADDR0); } EXPORT_SYMBOL(trace_hardirqs_on_caller); NOKPROBE_SYMBOL(trace_hardirqs_on_caller); __visible void trace_hardirqs_off_caller(unsigned long caller_addr) { lockdep_hardirqs_off(CALLER_ADDR0); if (!this_cpu_read(tracing_irq_cpu)) { this_cpu_write(tracing_irq_cpu, 1); tracer_hardirqs_off(CALLER_ADDR0, caller_addr); if (!in_nmi()) trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr); } } EXPORT_SYMBOL(trace_hardirqs_off_caller); NOKPROBE_SYMBOL(trace_hardirqs_off_caller); #endif /* CONFIG_TRACE_IRQFLAGS */ #ifdef CONFIG_TRACE_PREEMPT_TOGGLE void trace_preempt_on(unsigned long a0, unsigned long a1) { if (!in_nmi()) trace_preempt_enable_rcuidle(a0, a1); tracer_preempt_on(a0, a1); } void trace_preempt_off(unsigned long a0, unsigned long a1) { if (!in_nmi()) trace_preempt_disable_rcuidle(a0, a1); tracer_preempt_off(a0, a1); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org> Copyright (C) 2010 Google Inc. Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ #ifndef __L2CAP_H #define __L2CAP_H #include <asm/unaligned.h> #include <linux/atomic.h> /* L2CAP defaults */ #define L2CAP_DEFAULT_MTU 672 #define L2CAP_DEFAULT_MIN_MTU 48 #define L2CAP_DEFAULT_FLUSH_TO 0xFFFF #define L2CAP_EFS_DEFAULT_FLUSH_TO 0xFFFFFFFF #define L2CAP_DEFAULT_TX_WINDOW 63 #define L2CAP_DEFAULT_EXT_WINDOW 0x3FFF #define L2CAP_DEFAULT_MAX_TX 3 #define L2CAP_DEFAULT_RETRANS_TO 2000 /* 2 seconds */ #define L2CAP_DEFAULT_MONITOR_TO 12000 /* 12 seconds */ #define L2CAP_DEFAULT_MAX_PDU_SIZE 1492 /* Sized for AMP packet */ #define L2CAP_DEFAULT_ACK_TO 200 #define L2CAP_DEFAULT_MAX_SDU_SIZE 0xFFFF #define L2CAP_DEFAULT_SDU_ITIME 0xFFFFFFFF #define L2CAP_DEFAULT_ACC_LAT 0xFFFFFFFF #define L2CAP_BREDR_MAX_PAYLOAD 1019 /* 3-DH5 packet */ #define L2CAP_LE_MIN_MTU 23 #define L2CAP_ECRED_CONN_SCID_MAX 5 #define L2CAP_DISC_TIMEOUT msecs_to_jiffies(100) #define L2CAP_DISC_REJ_TIMEOUT msecs_to_jiffies(5000) #define L2CAP_ENC_TIMEOUT msecs_to_jiffies(5000) #define L2CAP_CONN_TIMEOUT msecs_to_jiffies(40000) #define L2CAP_INFO_TIMEOUT msecs_to_jiffies(4000) #define L2CAP_MOVE_TIMEOUT msecs_to_jiffies(4000) #define L2CAP_MOVE_ERTX_TIMEOUT msecs_to_jiffies(60000) #define L2CAP_WAIT_ACK_POLL_PERIOD msecs_to_jiffies(200) #define L2CAP_WAIT_ACK_TIMEOUT msecs_to_jiffies(10000) #define L2CAP_A2MP_DEFAULT_MTU 670 /* L2CAP socket address */ struct sockaddr_l2 { sa_family_t l2_family; __le16 l2_psm; bdaddr_t l2_bdaddr; __le16 l2_cid; __u8 l2_bdaddr_type; }; /* L2CAP socket options */ #define L2CAP_OPTIONS 0x01 struct l2cap_options { __u16 omtu; __u16 imtu; __u16 flush_to; __u8 mode; __u8 fcs; __u8 max_tx; __u16 txwin_size; }; #define L2CAP_CONNINFO 0x02 struct l2cap_conninfo { __u16 hci_handle; __u8 dev_class[3]; }; #define L2CAP_LM 0x03 #define L2CAP_LM_MASTER 0x0001 #define L2CAP_LM_AUTH 0x0002 #define L2CAP_LM_ENCRYPT 0x0004 #define L2CAP_LM_TRUSTED 0x0008 #define L2CAP_LM_RELIABLE 0x0010 #define L2CAP_LM_SECURE 0x0020 #define L2CAP_LM_FIPS 0x0040 /* L2CAP command codes */ #define L2CAP_COMMAND_REJ 0x01 #define L2CAP_CONN_REQ 0x02 #define L2CAP_CONN_RSP 0x03 #define L2CAP_CONF_REQ 0x04 #define L2CAP_CONF_RSP 0x05 #define L2CAP_DISCONN_REQ 0x06 #define L2CAP_DISCONN_RSP 0x07 #define L2CAP_ECHO_REQ 0x08 #define L2CAP_ECHO_RSP 0x09 #define L2CAP_INFO_REQ 0x0a #define L2CAP_INFO_RSP 0x0b #define L2CAP_CREATE_CHAN_REQ 0x0c #define L2CAP_CREATE_CHAN_RSP 0x0d #define L2CAP_MOVE_CHAN_REQ 0x0e #define L2CAP_MOVE_CHAN_RSP 0x0f #define L2CAP_MOVE_CHAN_CFM 0x10 #define L2CAP_MOVE_CHAN_CFM_RSP 0x11 #define L2CAP_CONN_PARAM_UPDATE_REQ 0x12 #define L2CAP_CONN_PARAM_UPDATE_RSP 0x13 #define L2CAP_LE_CONN_REQ 0x14 #define L2CAP_LE_CONN_RSP 0x15 #define L2CAP_LE_CREDITS 0x16 #define L2CAP_ECRED_CONN_REQ 0x17 #define L2CAP_ECRED_CONN_RSP 0x18 #define L2CAP_ECRED_RECONF_REQ 0x19 #define L2CAP_ECRED_RECONF_RSP 0x1a /* L2CAP extended feature mask */ #define L2CAP_FEAT_FLOWCTL 0x00000001 #define L2CAP_FEAT_RETRANS 0x00000002 #define L2CAP_FEAT_BIDIR_QOS 0x00000004 #define L2CAP_FEAT_ERTM 0x00000008 #define L2CAP_FEAT_STREAMING 0x00000010 #define L2CAP_FEAT_FCS 0x00000020 #define L2CAP_FEAT_EXT_FLOW 0x00000040 #define L2CAP_FEAT_FIXED_CHAN 0x00000080 #define L2CAP_FEAT_EXT_WINDOW 0x00000100 #define L2CAP_FEAT_UCD 0x00000200 /* L2CAP checksum option */ #define L2CAP_FCS_NONE 0x00 #define L2CAP_FCS_CRC16 0x01 /* L2CAP fixed channels */ #define L2CAP_FC_SIG_BREDR 0x02 #define L2CAP_FC_CONNLESS 0x04 #define L2CAP_FC_A2MP 0x08 #define L2CAP_FC_ATT 0x10 #define L2CAP_FC_SIG_LE 0x20 #define L2CAP_FC_SMP_LE 0x40 #define L2CAP_FC_SMP_BREDR 0x80 /* L2CAP Control Field bit masks */ #define L2CAP_CTRL_SAR 0xC000 #define L2CAP_CTRL_REQSEQ 0x3F00 #define L2CAP_CTRL_TXSEQ 0x007E #define L2CAP_CTRL_SUPERVISE 0x000C #define L2CAP_CTRL_RETRANS 0x0080 #define L2CAP_CTRL_FINAL 0x0080 #define L2CAP_CTRL_POLL 0x0010 #define L2CAP_CTRL_FRAME_TYPE 0x0001 /* I- or S-Frame */ #define L2CAP_CTRL_TXSEQ_SHIFT 1 #define L2CAP_CTRL_SUPER_SHIFT 2 #define L2CAP_CTRL_POLL_SHIFT 4 #define L2CAP_CTRL_FINAL_SHIFT 7 #define L2CAP_CTRL_REQSEQ_SHIFT 8 #define L2CAP_CTRL_SAR_SHIFT 14 /* L2CAP Extended Control Field bit mask */ #define L2CAP_EXT_CTRL_TXSEQ 0xFFFC0000 #define L2CAP_EXT_CTRL_SAR 0x00030000 #define L2CAP_EXT_CTRL_SUPERVISE 0x00030000 #define L2CAP_EXT_CTRL_REQSEQ 0x0000FFFC #define L2CAP_EXT_CTRL_POLL 0x00040000 #define L2CAP_EXT_CTRL_FINAL 0x00000002 #define L2CAP_EXT_CTRL_FRAME_TYPE 0x00000001 /* I- or S-Frame */ #define L2CAP_EXT_CTRL_FINAL_SHIFT 1 #define L2CAP_EXT_CTRL_REQSEQ_SHIFT 2 #define L2CAP_EXT_CTRL_SAR_SHIFT 16 #define L2CAP_EXT_CTRL_SUPER_SHIFT 16 #define L2CAP_EXT_CTRL_POLL_SHIFT 18 #define L2CAP_EXT_CTRL_TXSEQ_SHIFT 18 /* L2CAP Supervisory Function */ #define L2CAP_SUPER_RR 0x00 #define L2CAP_SUPER_REJ 0x01 #define L2CAP_SUPER_RNR 0x02 #define L2CAP_SUPER_SREJ 0x03 /* L2CAP Segmentation and Reassembly */ #define L2CAP_SAR_UNSEGMENTED 0x00 #define L2CAP_SAR_START 0x01 #define L2CAP_SAR_END 0x02 #define L2CAP_SAR_CONTINUE 0x03 /* L2CAP Command rej. reasons */ #define L2CAP_REJ_NOT_UNDERSTOOD 0x0000 #define L2CAP_REJ_MTU_EXCEEDED 0x0001 #define L2CAP_REJ_INVALID_CID 0x0002 /* L2CAP structures */ struct l2cap_hdr { __le16 len; __le16 cid; } __packed; #define L2CAP_HDR_SIZE 4 #define L2CAP_ENH_HDR_SIZE 6 #define L2CAP_EXT_HDR_SIZE 8 #define L2CAP_FCS_SIZE 2 #define L2CAP_SDULEN_SIZE 2 #define L2CAP_PSMLEN_SIZE 2 #define L2CAP_ENH_CTRL_SIZE 2 #define L2CAP_EXT_CTRL_SIZE 4 struct l2cap_cmd_hdr { __u8 code; __u8 ident; __le16 len; } __packed; #define L2CAP_CMD_HDR_SIZE 4 struct l2cap_cmd_rej_unk { __le16 reason; } __packed; struct l2cap_cmd_rej_mtu { __le16 reason; __le16 max_mtu; } __packed; struct l2cap_cmd_rej_cid { __le16 reason; __le16 scid; __le16 dcid; } __packed; struct l2cap_conn_req { __le16 psm; __le16 scid; } __packed; struct l2cap_conn_rsp { __le16 dcid; __le16 scid; __le16 result; __le16 status; } __packed; /* protocol/service multiplexer (PSM) */ #define L2CAP_PSM_SDP 0x0001 #define L2CAP_PSM_RFCOMM 0x0003 #define L2CAP_PSM_3DSP 0x0021 #define L2CAP_PSM_IPSP 0x0023 /* 6LoWPAN */ #define L2CAP_PSM_DYN_START 0x1001 #define L2CAP_PSM_DYN_END 0xffff #define L2CAP_PSM_AUTO_END 0x10ff #define L2CAP_PSM_LE_DYN_START 0x0080 #define L2CAP_PSM_LE_DYN_END 0x00ff /* channel identifier */ #define L2CAP_CID_SIGNALING 0x0001 #define L2CAP_CID_CONN_LESS 0x0002 #define L2CAP_CID_A2MP 0x0003 #define L2CAP_CID_ATT 0x0004 #define L2CAP_CID_LE_SIGNALING 0x0005 #define L2CAP_CID_SMP 0x0006 #define L2CAP_CID_SMP_BREDR 0x0007 #define L2CAP_CID_DYN_START 0x0040 #define L2CAP_CID_DYN_END 0xffff #define L2CAP_CID_LE_DYN_END 0x007f /* connect/create channel results */ #define L2CAP_CR_SUCCESS 0x0000 #define L2CAP_CR_PEND 0x0001 #define L2CAP_CR_BAD_PSM 0x0002 #define L2CAP_CR_SEC_BLOCK 0x0003 #define L2CAP_CR_NO_MEM 0x0004 #define L2CAP_CR_BAD_AMP 0x0005 #define L2CAP_CR_INVALID_SCID 0x0006 #define L2CAP_CR_SCID_IN_USE 0x0007 /* credit based connect results */ #define L2CAP_CR_LE_SUCCESS 0x0000 #define L2CAP_CR_LE_BAD_PSM 0x0002 #define L2CAP_CR_LE_NO_MEM 0x0004 #define L2CAP_CR_LE_AUTHENTICATION 0x0005 #define L2CAP_CR_LE_AUTHORIZATION 0x0006 #define L2CAP_CR_LE_BAD_KEY_SIZE 0x0007 #define L2CAP_CR_LE_ENCRYPTION 0x0008 #define L2CAP_CR_LE_INVALID_SCID 0x0009 #define L2CAP_CR_LE_SCID_IN_USE 0X000A #define L2CAP_CR_LE_UNACCEPT_PARAMS 0X000B #define L2CAP_CR_LE_INVALID_PARAMS 0X000C /* connect/create channel status */ #define L2CAP_CS_NO_INFO 0x0000 #define L2CAP_CS_AUTHEN_PEND 0x0001 #define L2CAP_CS_AUTHOR_PEND 0x0002 struct l2cap_conf_req { __le16 dcid; __le16 flags; __u8 data[]; } __packed; struct l2cap_conf_rsp { __le16 scid; __le16 flags; __le16 result; __u8 data[]; } __packed; #define L2CAP_CONF_SUCCESS 0x0000 #define L2CAP_CONF_UNACCEPT 0x0001 #define L2CAP_CONF_REJECT 0x0002 #define L2CAP_CONF_UNKNOWN 0x0003 #define L2CAP_CONF_PENDING 0x0004 #define L2CAP_CONF_EFS_REJECT 0x0005 /* configuration req/rsp continuation flag */ #define L2CAP_CONF_FLAG_CONTINUATION 0x0001 struct l2cap_conf_opt { __u8 type; __u8 len; __u8 val[]; } __packed; #define L2CAP_CONF_OPT_SIZE 2 #define L2CAP_CONF_HINT 0x80 #define L2CAP_CONF_MASK 0x7f #define L2CAP_CONF_MTU 0x01 #define L2CAP_CONF_FLUSH_TO 0x02 #define L2CAP_CONF_QOS 0x03 #define L2CAP_CONF_RFC 0x04 #define L2CAP_CONF_FCS 0x05 #define L2CAP_CONF_EFS 0x06 #define L2CAP_CONF_EWS 0x07 #define L2CAP_CONF_MAX_SIZE 22 struct l2cap_conf_rfc { __u8 mode; __u8 txwin_size; __u8 max_transmit; __le16 retrans_timeout; __le16 monitor_timeout; __le16 max_pdu_size; } __packed; #define L2CAP_MODE_BASIC 0x00 #define L2CAP_MODE_RETRANS 0x01 #define L2CAP_MODE_FLOWCTL 0x02 #define L2CAP_MODE_ERTM 0x03 #define L2CAP_MODE_STREAMING 0x04 /* Unlike the above this one doesn't actually map to anything that would * ever be sent over the air. Therefore, use a value that's unlikely to * ever be used in the BR/EDR configuration phase. */ #define L2CAP_MODE_LE_FLOWCTL 0x80 #define L2CAP_MODE_EXT_FLOWCTL 0x81 struct l2cap_conf_efs { __u8 id; __u8 stype; __le16 msdu; __le32 sdu_itime; __le32 acc_lat; __le32 flush_to; } __packed; #define L2CAP_SERV_NOTRAFIC 0x00 #define L2CAP_SERV_BESTEFFORT 0x01 #define L2CAP_SERV_GUARANTEED 0x02 #define L2CAP_BESTEFFORT_ID 0x01 struct l2cap_disconn_req { __le16 dcid; __le16 scid; } __packed; struct l2cap_disconn_rsp { __le16 dcid; __le16 scid; } __packed; struct l2cap_info_req { __le16 type; } __packed; struct l2cap_info_rsp { __le16 type; __le16 result; __u8 data[]; } __packed; struct l2cap_create_chan_req { __le16 psm; __le16 scid; __u8 amp_id; } __packed; struct l2cap_create_chan_rsp { __le16 dcid; __le16 scid; __le16 result; __le16 status; } __packed; struct l2cap_move_chan_req { __le16 icid; __u8 dest_amp_id; } __packed; struct l2cap_move_chan_rsp { __le16 icid; __le16 result; } __packed; #define L2CAP_MR_SUCCESS 0x0000 #define L2CAP_MR_PEND 0x0001 #define L2CAP_MR_BAD_ID 0x0002 #define L2CAP_MR_SAME_ID 0x0003 #define L2CAP_MR_NOT_SUPP 0x0004 #define L2CAP_MR_COLLISION 0x0005 #define L2CAP_MR_NOT_ALLOWED 0x0006 struct l2cap_move_chan_cfm { __le16 icid; __le16 result; } __packed; #define L2CAP_MC_CONFIRMED 0x0000 #define L2CAP_MC_UNCONFIRMED 0x0001 struct l2cap_move_chan_cfm_rsp { __le16 icid; } __packed; /* info type */ #define L2CAP_IT_CL_MTU 0x0001 #define L2CAP_IT_FEAT_MASK 0x0002 #define L2CAP_IT_FIXED_CHAN 0x0003 /* info result */ #define L2CAP_IR_SUCCESS 0x0000 #define L2CAP_IR_NOTSUPP 0x0001 struct l2cap_conn_param_update_req { __le16 min; __le16 max; __le16 latency; __le16 to_multiplier; } __packed; struct l2cap_conn_param_update_rsp { __le16 result; } __packed; /* Connection Parameters result */ #define L2CAP_CONN_PARAM_ACCEPTED 0x0000 #define L2CAP_CONN_PARAM_REJECTED 0x0001 struct l2cap_le_conn_req { __le16 psm; __le16 scid; __le16 mtu; __le16 mps; __le16 credits; } __packed; struct l2cap_le_conn_rsp { __le16 dcid; __le16 mtu; __le16 mps; __le16 credits; __le16 result; } __packed; struct l2cap_le_credits { __le16 cid; __le16 credits; } __packed; #define L2CAP_ECRED_MIN_MTU 64 #define L2CAP_ECRED_MIN_MPS 64 struct l2cap_ecred_conn_req { __le16 psm; __le16 mtu; __le16 mps; __le16 credits; __le16 scid[]; } __packed; struct l2cap_ecred_conn_rsp { __le16 mtu; __le16 mps; __le16 credits; __le16 result; __le16 dcid[]; }; struct l2cap_ecred_reconf_req { __le16 mtu; __le16 mps; __le16 scid[]; } __packed; #define L2CAP_RECONF_SUCCESS 0x0000 #define L2CAP_RECONF_INVALID_MTU 0x0001 #define L2CAP_RECONF_INVALID_MPS 0x0002 struct l2cap_ecred_reconf_rsp { __le16 result; } __packed; /* ----- L2CAP channels and connections ----- */ struct l2cap_seq_list { __u16 head; __u16 tail; __u16 mask; __u16 *list; }; #define L2CAP_SEQ_LIST_CLEAR 0xFFFF #define L2CAP_SEQ_LIST_TAIL 0x8000 struct l2cap_chan { struct l2cap_conn *conn; struct hci_conn *hs_hcon; struct hci_chan *hs_hchan; struct kref kref; atomic_t nesting; __u8 state; bdaddr_t dst; __u8 dst_type; bdaddr_t src; __u8 src_type; __le16 psm; __le16 sport; __u16 dcid; __u16 scid; __u16 imtu; __u16 omtu; __u16 flush_to; __u8 mode; __u8 chan_type; __u8 chan_policy; __u8 sec_level; __u8 ident; __u8 conf_req[64]; __u8 conf_len; __u8 num_conf_req; __u8 num_conf_rsp; __u8 fcs; __u16 tx_win; __u16 tx_win_max; __u16 ack_win; __u8 max_tx; __u16 retrans_timeout; __u16 monitor_timeout; __u16 mps; __u16 tx_credits; __u16 rx_credits; __u8 tx_state; __u8 rx_state; unsigned long conf_state; unsigned long conn_state; unsigned long flags; __u8 remote_amp_id; __u8 local_amp_id; __u8 move_id; __u8 move_state; __u8 move_role; __u16 next_tx_seq; __u16 expected_ack_seq; __u16 expected_tx_seq; __u16 buffer_seq; __u16 srej_save_reqseq; __u16 last_acked_seq; __u16 frames_sent; __u16 unacked_frames; __u8 retry_count; __u16 sdu_len; struct sk_buff *sdu; struct sk_buff *sdu_last_frag; __u16 remote_tx_win; __u8 remote_max_tx; __u16 remote_mps; __u8 local_id; __u8 local_stype; __u16 local_msdu; __u32 local_sdu_itime; __u32 local_acc_lat; __u32 local_flush_to; __u8 remote_id; __u8 remote_stype; __u16 remote_msdu; __u32 remote_sdu_itime; __u32 remote_acc_lat; __u32 remote_flush_to; struct delayed_work chan_timer; struct delayed_work retrans_timer; struct delayed_work monitor_timer; struct delayed_work ack_timer; struct sk_buff *tx_send_head; struct sk_buff_head tx_q; struct sk_buff_head srej_q; struct l2cap_seq_list srej_list; struct l2cap_seq_list retrans_list; struct list_head list; struct list_head global_l; void *data; const struct l2cap_ops *ops; struct mutex lock; }; struct l2cap_ops { char *name; struct l2cap_chan *(*new_connection) (struct l2cap_chan *chan); int (*recv) (struct l2cap_chan * chan, struct sk_buff *skb); void (*teardown) (struct l2cap_chan *chan, int err); void (*close) (struct l2cap_chan *chan); void (*state_change) (struct l2cap_chan *chan, int state, int err); void (*ready) (struct l2cap_chan *chan); void (*defer) (struct l2cap_chan *chan); void (*resume) (struct l2cap_chan *chan); void (*suspend) (struct l2cap_chan *chan); void (*set_shutdown) (struct l2cap_chan *chan); long (*get_sndtimeo) (struct l2cap_chan *chan); struct pid *(*get_peer_pid) (struct l2cap_chan *chan); struct sk_buff *(*alloc_skb) (struct l2cap_chan *chan, unsigned long hdr_len, unsigned long len, int nb); int (*filter) (struct l2cap_chan * chan, struct sk_buff *skb); }; struct l2cap_conn { struct hci_conn *hcon; struct hci_chan *hchan; unsigned int mtu; __u32 feat_mask; __u8 remote_fixed_chan; __u8 local_fixed_chan; __u8 info_state; __u8 info_ident; struct delayed_work info_timer; struct sk_buff *rx_skb; __u32 rx_len; __u8 tx_ident; struct mutex ident_lock; struct sk_buff_head pending_rx; struct work_struct pending_rx_work; struct work_struct id_addr_update_work; __u8 disc_reason; struct l2cap_chan *smp; struct list_head chan_l; struct mutex chan_lock; struct kref ref; struct list_head users; }; struct l2cap_user { struct list_head list; int (*probe) (struct l2cap_conn *conn, struct l2cap_user *user); void (*remove) (struct l2cap_conn *conn, struct l2cap_user *user); }; #define L2CAP_INFO_CL_MTU_REQ_SENT 0x01 #define L2CAP_INFO_FEAT_MASK_REQ_SENT 0x04 #define L2CAP_INFO_FEAT_MASK_REQ_DONE 0x08 #define L2CAP_CHAN_RAW 1 #define L2CAP_CHAN_CONN_LESS 2 #define L2CAP_CHAN_CONN_ORIENTED 3 #define L2CAP_CHAN_FIXED 4 /* ----- L2CAP socket info ----- */ #define l2cap_pi(sk) ((struct l2cap_pinfo *) sk) struct l2cap_pinfo { struct bt_sock bt; struct l2cap_chan *chan; struct sk_buff *rx_busy_skb; }; enum { CONF_REQ_SENT, CONF_INPUT_DONE, CONF_OUTPUT_DONE, CONF_MTU_DONE, CONF_MODE_DONE, CONF_CONNECT_PEND, CONF_RECV_NO_FCS, CONF_STATE2_DEVICE, CONF_EWS_RECV, CONF_LOC_CONF_PEND, CONF_REM_CONF_PEND, CONF_NOT_COMPLETE, }; #define L2CAP_CONF_MAX_CONF_REQ 2 #define L2CAP_CONF_MAX_CONF_RSP 2 enum { CONN_SREJ_SENT, CONN_WAIT_F, CONN_SREJ_ACT, CONN_SEND_PBIT, CONN_REMOTE_BUSY, CONN_LOCAL_BUSY, CONN_REJ_ACT, CONN_SEND_FBIT, CONN_RNR_SENT, }; /* Definitions for flags in l2cap_chan */ enum { FLAG_ROLE_SWITCH, FLAG_FORCE_ACTIVE, FLAG_FORCE_RELIABLE, FLAG_FLUSHABLE, FLAG_EXT_CTRL, FLAG_EFS_ENABLE, FLAG_DEFER_SETUP, FLAG_LE_CONN_REQ_SENT, FLAG_ECRED_CONN_REQ_SENT, FLAG_PENDING_SECURITY, FLAG_HOLD_HCI_CONN, }; /* Lock nesting levels for L2CAP channels. We need these because lockdep * otherwise considers all channels equal and will e.g. complain about a * connection oriented channel triggering SMP procedures or a listening * channel creating and locking a child channel. */ enum { L2CAP_NESTING_SMP, L2CAP_NESTING_NORMAL, L2CAP_NESTING_PARENT, }; enum { L2CAP_TX_STATE_XMIT, L2CAP_TX_STATE_WAIT_F, }; enum { L2CAP_RX_STATE_RECV, L2CAP_RX_STATE_SREJ_SENT, L2CAP_RX_STATE_MOVE, L2CAP_RX_STATE_WAIT_P, L2CAP_RX_STATE_WAIT_F, }; enum { L2CAP_TXSEQ_EXPECTED, L2CAP_TXSEQ_EXPECTED_SREJ, L2CAP_TXSEQ_UNEXPECTED, L2CAP_TXSEQ_UNEXPECTED_SREJ, L2CAP_TXSEQ_DUPLICATE, L2CAP_TXSEQ_DUPLICATE_SREJ, L2CAP_TXSEQ_INVALID, L2CAP_TXSEQ_INVALID_IGNORE, }; enum { L2CAP_EV_DATA_REQUEST, L2CAP_EV_LOCAL_BUSY_DETECTED, L2CAP_EV_LOCAL_BUSY_CLEAR, L2CAP_EV_RECV_REQSEQ_AND_FBIT, L2CAP_EV_RECV_FBIT, L2CAP_EV_RETRANS_TO, L2CAP_EV_MONITOR_TO, L2CAP_EV_EXPLICIT_POLL, L2CAP_EV_RECV_IFRAME, L2CAP_EV_RECV_RR, L2CAP_EV_RECV_REJ, L2CAP_EV_RECV_RNR, L2CAP_EV_RECV_SREJ, L2CAP_EV_RECV_FRAME, }; enum { L2CAP_MOVE_ROLE_NONE, L2CAP_MOVE_ROLE_INITIATOR, L2CAP_MOVE_ROLE_RESPONDER, }; enum { L2CAP_MOVE_STABLE, L2CAP_MOVE_WAIT_REQ, L2CAP_MOVE_WAIT_RSP, L2CAP_MOVE_WAIT_RSP_SUCCESS, L2CAP_MOVE_WAIT_CONFIRM, L2CAP_MOVE_WAIT_CONFIRM_RSP, L2CAP_MOVE_WAIT_LOGICAL_COMP, L2CAP_MOVE_WAIT_LOGICAL_CFM, L2CAP_MOVE_WAIT_LOCAL_BUSY, L2CAP_MOVE_WAIT_PREPARE, }; void l2cap_chan_hold(struct l2cap_chan *c); void l2cap_chan_put(struct l2cap_chan *c); static inline void l2cap_chan_lock(struct l2cap_chan *chan) { mutex_lock_nested(&chan->lock, atomic_read(&chan->nesting)); } static inline void l2cap_chan_unlock(struct l2cap_chan *chan) { mutex_unlock(&chan->lock); } static inline void l2cap_set_timer(struct l2cap_chan *chan, struct delayed_work *work, long timeout) { BT_DBG("chan %p state %s timeout %ld", chan, state_to_string(chan->state), timeout); /* If delayed work cancelled do not hold(chan) since it is already done with previous set_timer */ if (!cancel_delayed_work(work)) l2cap_chan_hold(chan); schedule_delayed_work(work, timeout); } static inline bool l2cap_clear_timer(struct l2cap_chan *chan, struct delayed_work *work) { bool ret; /* put(chan) if delayed work cancelled otherwise it is done in delayed work function */ ret = cancel_delayed_work(work); if (ret) l2cap_chan_put(chan); return ret; } #define __set_chan_timer(c, t) l2cap_set_timer(c, &c->chan_timer, (t)) #define __clear_chan_timer(c) l2cap_clear_timer(c, &c->chan_timer) #define __clear_retrans_timer(c) l2cap_clear_timer(c, &c->retrans_timer) #define __clear_monitor_timer(c) l2cap_clear_timer(c, &c->monitor_timer) #define __set_ack_timer(c) l2cap_set_timer(c, &chan->ack_timer, \ msecs_to_jiffies(L2CAP_DEFAULT_ACK_TO)); #define __clear_ack_timer(c) l2cap_clear_timer(c, &c->ack_timer) static inline int __seq_offset(struct l2cap_chan *chan, __u16 seq1, __u16 seq2) { if (seq1 >= seq2) return seq1 - seq2; else return chan->tx_win_max + 1 - seq2 + seq1; } static inline __u16 __next_seq(struct l2cap_chan *chan, __u16 seq) { return (seq + 1) % (chan->tx_win_max + 1); } static inline struct l2cap_chan *l2cap_chan_no_new_connection(struct l2cap_chan *chan) { return NULL; } static inline int l2cap_chan_no_recv(struct l2cap_chan *chan, struct sk_buff *skb) { return -ENOSYS; } static inline struct sk_buff *l2cap_chan_no_alloc_skb(struct l2cap_chan *chan, unsigned long hdr_len, unsigned long len, int nb) { return ERR_PTR(-ENOSYS); } static inline void l2cap_chan_no_teardown(struct l2cap_chan *chan, int err) { } static inline void l2cap_chan_no_close(struct l2cap_chan *chan) { } static inline void l2cap_chan_no_ready(struct l2cap_chan *chan) { } static inline void l2cap_chan_no_state_change(struct l2cap_chan *chan, int state, int err) { } static inline void l2cap_chan_no_defer(struct l2cap_chan *chan) { } static inline void l2cap_chan_no_suspend(struct l2cap_chan *chan) { } static inline void l2cap_chan_no_resume(struct l2cap_chan *chan) { } static inline void l2cap_chan_no_set_shutdown(struct l2cap_chan *chan) { } static inline long l2cap_chan_no_get_sndtimeo(struct l2cap_chan *chan) { return 0; } extern bool disable_ertm; extern bool enable_ecred; int l2cap_init_sockets(void); void l2cap_cleanup_sockets(void); bool l2cap_is_socket(struct socket *sock); void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan); void __l2cap_ecred_conn_rsp_defer(struct l2cap_chan *chan); void __l2cap_connect_rsp_defer(struct l2cap_chan *chan); int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm); int l2cap_add_scid(struct l2cap_chan *chan, __u16 scid); struct l2cap_chan *l2cap_chan_create(void); void l2cap_chan_close(struct l2cap_chan *chan, int reason); int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *dst, u8 dst_type); int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu); int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len); void l2cap_chan_busy(struct l2cap_chan *chan, int busy); int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator); void l2cap_chan_set_defaults(struct l2cap_chan *chan); int l2cap_ertm_init(struct l2cap_chan *chan); void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan); void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan); typedef void (*l2cap_chan_func_t)(struct l2cap_chan *chan, void *data); void l2cap_chan_list(struct l2cap_conn *conn, l2cap_chan_func_t func, void *data); void l2cap_chan_del(struct l2cap_chan *chan, int err); void l2cap_send_conn_req(struct l2cap_chan *chan); void l2cap_move_start(struct l2cap_chan *chan); void l2cap_logical_cfm(struct l2cap_chan *chan, struct hci_chan *hchan, u8 status); void __l2cap_physical_cfm(struct l2cap_chan *chan, int result); struct l2cap_conn *l2cap_conn_get(struct l2cap_conn *conn); void l2cap_conn_put(struct l2cap_conn *conn); int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user); void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user); #endif /* __L2CAP_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_COMPAT_H #define _ASM_X86_COMPAT_H /* * Architecture specific compatibility types */ #include <linux/types.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> #include <asm/processor.h> #include <asm/user32.h> #include <asm/unistd.h> #include <asm-generic/compat.h> #define COMPAT_USER_HZ 100 #define COMPAT_UTS_MACHINE "i686\0\0" typedef u16 __compat_uid_t; typedef u16 __compat_gid_t; typedef u32 __compat_uid32_t; typedef u32 __compat_gid32_t; typedef u16 compat_mode_t; typedef u16 compat_dev_t; typedef u16 compat_nlink_t; typedef u16 compat_ipc_pid_t; typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; struct compat_stat { compat_dev_t st_dev; u16 __pad1; compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; __compat_uid_t st_uid; __compat_gid_t st_gid; compat_dev_t st_rdev; u16 __pad2; u32 st_size; u32 st_blksize; u32 st_blocks; u32 st_atime; u32 st_atime_nsec; u32 st_mtime; u32 st_mtime_nsec; u32 st_ctime; u32 st_ctime_nsec; u32 __unused4; u32 __unused5; }; struct compat_flock { short l_type; short l_whence; compat_off_t l_start; compat_off_t l_len; compat_pid_t l_pid; }; #define F_GETLK64 12 /* using 'struct flock64' */ #define F_SETLK64 13 #define F_SETLKW64 14 /* * IA32 uses 4 byte alignment for 64 bit quantities, * so we need to pack this structure. */ struct compat_flock64 { short l_type; short l_whence; compat_loff_t l_start; compat_loff_t l_len; compat_pid_t l_pid; } __attribute__((packed)); struct compat_statfs { int f_type; int f_bsize; int f_blocks; int f_bfree; int f_bavail; int f_files; int f_ffree; compat_fsid_t f_fsid; int f_namelen; /* SunOS ignores this field. */ int f_frsize; int f_flags; int f_spare[4]; }; #define COMPAT_RLIM_INFINITY 0xffffffff typedef u32 compat_old_sigset_t; /* at least 32 bits */ #define _COMPAT_NSIG 64 #define _COMPAT_NSIG_BPW 32 typedef u32 compat_sigset_word; #define COMPAT_OFF_T_MAX 0x7fffffff struct compat_ipc64_perm { compat_key_t key; __compat_uid32_t uid; __compat_gid32_t gid; __compat_uid32_t cuid; __compat_gid32_t cgid; unsigned short mode; unsigned short __pad1; unsigned short seq; unsigned short __pad2; compat_ulong_t unused1; compat_ulong_t unused2; }; struct compat_semid64_ds { struct compat_ipc64_perm sem_perm; compat_ulong_t sem_otime; compat_ulong_t sem_otime_high; compat_ulong_t sem_ctime; compat_ulong_t sem_ctime_high; compat_ulong_t sem_nsems; compat_ulong_t __unused3; compat_ulong_t __unused4; }; struct compat_msqid64_ds { struct compat_ipc64_perm msg_perm; compat_ulong_t msg_stime; compat_ulong_t msg_stime_high; compat_ulong_t msg_rtime; compat_ulong_t msg_rtime_high; compat_ulong_t msg_ctime; compat_ulong_t msg_ctime_high; compat_ulong_t msg_cbytes; compat_ulong_t msg_qnum; compat_ulong_t msg_qbytes; compat_pid_t msg_lspid; compat_pid_t msg_lrpid; compat_ulong_t __unused4; compat_ulong_t __unused5; }; struct compat_shmid64_ds { struct compat_ipc64_perm shm_perm; compat_size_t shm_segsz; compat_ulong_t shm_atime; compat_ulong_t shm_atime_high; compat_ulong_t shm_dtime; compat_ulong_t shm_dtime_high; compat_ulong_t shm_ctime; compat_ulong_t shm_ctime_high; compat_pid_t shm_cpid; compat_pid_t shm_lpid; compat_ulong_t shm_nattch; compat_ulong_t __unused4; compat_ulong_t __unused5; }; /* * The type of struct elf_prstatus.pr_reg in compatible core dumps. */ typedef struct user_regs_struct compat_elf_gregset_t; /* Full regset -- prstatus on x32, otherwise on ia32 */ #define PRSTATUS_SIZE(S, R) (R != sizeof(S.pr_reg) ? 144 : 296) #define SET_PR_FPVALID(S, V, R) \ do { *(int *) (((void *) &((S)->pr_reg)) + R) = (V); } \ while (0) #ifdef CONFIG_X86_X32_ABI #define COMPAT_USE_64BIT_TIME \ (!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)) #endif static inline void __user *arch_compat_alloc_user_space(long len) { compat_uptr_t sp; if (test_thread_flag(TIF_IA32)) { sp = task_pt_regs(current)->sp; } else { /* -128 for the x32 ABI redzone */ sp = task_pt_regs(current)->sp - 128; } return (void __user *)round_down(sp - len, 16); } static inline bool in_x32_syscall(void) { #ifdef CONFIG_X86_X32_ABI if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT) return true; #endif return false; } static inline bool in_32bit_syscall(void) { return in_ia32_syscall() || in_x32_syscall(); } #ifdef CONFIG_COMPAT static inline bool in_compat_syscall(void) { return in_32bit_syscall(); } #define in_compat_syscall in_compat_syscall /* override the generic impl */ #define compat_need_64bit_alignment_fixup in_ia32_syscall #endif struct compat_siginfo; #ifdef CONFIG_X86_X32_ABI int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginfo_t *from); #define copy_siginfo_to_user32 copy_siginfo_to_user32 #endif /* CONFIG_X86_X32_ABI */ #endif /* _ASM_X86_COMPAT_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 /* SPDX-License-Identifier: GPL-2.0 */ /* interrupt.h */ #ifndef _LINUX_INTERRUPT_H #define _LINUX_INTERRUPT_H #include <linux/kernel.h> #include <linux/bitops.h> #include <linux/cpumask.h> #include <linux/irqreturn.h> #include <linux/irqnr.h> #include <linux/hardirq.h> #include <linux/irqflags.h> #include <linux/hrtimer.h> #include <linux/kref.h> #include <linux/workqueue.h> #include <linux/atomic.h> #include <asm/ptrace.h> #include <asm/irq.h> #include <asm/sections.h> /* * These correspond to the IORESOURCE_IRQ_* defines in * linux/ioport.h to select the interrupt line behaviour. When * requesting an interrupt without specifying a IRQF_TRIGGER, the * setting should be assumed to be "as already configured", which * may be as per machine or firmware initialisation. */ #define IRQF_TRIGGER_NONE 0x00000000 #define IRQF_TRIGGER_RISING 0x00000001 #define IRQF_TRIGGER_FALLING 0x00000002 #define IRQF_TRIGGER_HIGH 0x00000004 #define IRQF_TRIGGER_LOW 0x00000008 #define IRQF_TRIGGER_MASK (IRQF_TRIGGER_HIGH | IRQF_TRIGGER_LOW | \ IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING) #define IRQF_TRIGGER_PROBE 0x00000010 /* * These flags used only by the kernel as part of the * irq handling routines. * * IRQF_SHARED - allow sharing the irq among several devices * IRQF_PROBE_SHARED - set by callers when they expect sharing mismatches to occur * IRQF_TIMER - Flag to mark this interrupt as timer interrupt * IRQF_PERCPU - Interrupt is per cpu * IRQF_NOBALANCING - Flag to exclude this interrupt from irq balancing * IRQF_IRQPOLL - Interrupt is used for polling (only the interrupt that is * registered first in a shared interrupt is considered for * performance reasons) * IRQF_ONESHOT - Interrupt is not reenabled after the hardirq handler finished. * Used by threaded interrupts which need to keep the * irq line disabled until the threaded handler has been run. * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend. Does not guarantee * that this interrupt will wake the system from a suspended * state. See Documentation/power/suspend-and-interrupts.rst * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set * IRQF_NO_THREAD - Interrupt cannot be threaded * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device * resume time. * IRQF_COND_SUSPEND - If the IRQ is shared with a NO_SUSPEND user, execute this * interrupt handler after suspending interrupts. For system * wakeup devices users need to implement wakeup detection in * their interrupt handlers. */ #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 #define __IRQF_TIMER 0x00000200 #define IRQF_PERCPU 0x00000400 #define IRQF_NOBALANCING 0x00000800 #define IRQF_IRQPOLL 0x00001000 #define IRQF_ONESHOT 0x00002000 #define IRQF_NO_SUSPEND 0x00004000 #define IRQF_FORCE_RESUME 0x00008000 #define IRQF_NO_THREAD 0x00010000 #define IRQF_EARLY_RESUME 0x00020000 #define IRQF_COND_SUSPEND 0x00040000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) /* * These values can be returned by request_any_context_irq() and * describe the context the interrupt will be run in. * * IRQC_IS_HARDIRQ - interrupt runs in hardirq context * IRQC_IS_NESTED - interrupt runs in a nested threaded context */ enum { IRQC_IS_HARDIRQ = 0, IRQC_IS_NESTED, }; typedef irqreturn_t (*irq_handler_t)(int, void *); /** * struct irqaction - per interrupt action descriptor * @handler: interrupt handler function * @name: name of the device * @dev_id: cookie to identify the device * @percpu_dev_id: cookie to identify the device * @next: pointer to the next irqaction for shared interrupts * @irq: interrupt number * @flags: flags (see IRQF_* above) * @thread_fn: interrupt handler function for threaded interrupts * @thread: thread pointer for threaded interrupts * @secondary: pointer to secondary irqaction (force threading) * @thread_flags: flags related to @thread * @thread_mask: bitmask for keeping track of @thread activity * @dir: pointer to the proc/irq/NN/name entry */ struct irqaction { irq_handler_t handler; void *dev_id; void __percpu *percpu_dev_id; struct irqaction *next; irq_handler_t thread_fn; struct task_struct *thread; struct irqaction *secondary; unsigned int irq; unsigned int flags; unsigned long thread_flags; unsigned long thread_mask; const char *name; struct proc_dir_entry *dir; } ____cacheline_internodealigned_in_smp; extern irqreturn_t no_action(int cpl, void *dev_id); /* * If a (PCI) device interrupt is not connected we set dev->irq to * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we * can distingiush that case from other error returns. * * 0x80000000 is guaranteed to be outside the available range of interrupts * and easy to distinguish from other possible incorrect values. */ #define IRQ_NOTCONNECTED (1U << 31) extern int __must_check request_threaded_irq(unsigned int irq, irq_handler_t handler, irq_handler_t thread_fn, unsigned long flags, const char *name, void *dev); /** * request_irq - Add a handler for an interrupt line * @irq: The interrupt line to allocate * @handler: Function to be called when the IRQ occurs. * Primary handler for threaded interrupts * If NULL, the default primary handler is installed * @flags: Handling flags * @name: Name of the device generating this interrupt * @dev: A cookie passed to the handler function * * This call allocates an interrupt and establishes a handler; see * the documentation for request_threaded_irq() for details. */ static inline int __must_check request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *name, void *dev) { return request_threaded_irq(irq, handler, NULL, flags, name, dev); } extern int __must_check request_any_context_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *name, void *dev_id); extern int __must_check __request_percpu_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *devname, void __percpu *percpu_dev_id); extern int __must_check request_nmi(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *name, void *dev); static inline int __must_check request_percpu_irq(unsigned int irq, irq_handler_t handler, const char *devname, void __percpu *percpu_dev_id) { return __request_percpu_irq(irq, handler, 0, devname, percpu_dev_id); } extern int __must_check request_percpu_nmi(unsigned int irq, irq_handler_t handler, const char *devname, void __percpu *dev); extern const void *free_irq(unsigned int, void *); extern void free_percpu_irq(unsigned int, void __percpu *); extern const void *free_nmi(unsigned int irq, void *dev_id); extern void free_percpu_nmi(unsigned int irq, void __percpu *percpu_dev_id); struct device; extern int __must_check devm_request_threaded_irq(struct device *dev, unsigned int irq, irq_handler_t handler, irq_handler_t thread_fn, unsigned long irqflags, const char *devname, void *dev_id); static inline int __must_check devm_request_irq(struct device *dev, unsigned int irq, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id) { return devm_request_threaded_irq(dev, irq, handler, NULL, irqflags, devname, dev_id); } extern int __must_check devm_request_any_context_irq(struct device *dev, unsigned int irq, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id); extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); /* * On lockdep we dont want to enable hardirqs in hardirq * context. Use local_irq_enable_in_hardirq() to annotate * kernel code that has to do this nevertheless (pretty much * the only valid case is for old/broken hardware that is * insanely slow). * * NOTE: in theory this might break fragile code that relies * on hardirq delivery - in practice we dont seem to have such * places left. So the only effect should be slightly increased * irqs-off latencies. */ #ifdef CONFIG_LOCKDEP # define local_irq_enable_in_hardirq() do { } while (0) #else # define local_irq_enable_in_hardirq() local_irq_enable() #endif extern void disable_irq_nosync(unsigned int irq); extern bool disable_hardirq(unsigned int irq); extern void disable_irq(unsigned int irq); extern void disable_percpu_irq(unsigned int irq); extern void enable_irq(unsigned int irq); extern void enable_percpu_irq(unsigned int irq, unsigned int type); extern bool irq_percpu_is_enabled(unsigned int irq); extern void irq_wake_thread(unsigned int irq, void *dev_id); extern void disable_nmi_nosync(unsigned int irq); extern void disable_percpu_nmi(unsigned int irq); extern void enable_nmi(unsigned int irq); extern void enable_percpu_nmi(unsigned int irq, unsigned int type); extern int prepare_percpu_nmi(unsigned int irq); extern void teardown_percpu_nmi(unsigned int irq); extern int irq_inject_interrupt(unsigned int irq); /* The following three functions are for the core kernel use only. */ extern void suspend_device_irqs(void); extern void resume_device_irqs(void); extern void rearm_wake_irq(unsigned int irq); /** * struct irq_affinity_notify - context for notification of IRQ affinity changes * @irq: Interrupt to which notification applies * @kref: Reference count, for internal use * @work: Work item, for internal use * @notify: Function to be called on change. This will be * called in process context. * @release: Function to be called on release. This will be * called in process context. Once registered, the * structure must only be freed when this function is * called or later. */ struct irq_affinity_notify { unsigned int irq; struct kref kref; struct work_struct work; void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask); void (*release)(struct kref *ref); }; #define IRQ_AFFINITY_MAX_SETS 4 /** * struct irq_affinity - Description for automatic irq affinity assignements * @pre_vectors: Don't apply affinity to @pre_vectors at beginning of * the MSI(-X) vector space * @post_vectors: Don't apply affinity to @post_vectors at end of * the MSI(-X) vector space * @nr_sets: The number of interrupt sets for which affinity * spreading is required * @set_size: Array holding the size of each interrupt set * @calc_sets: Callback for calculating the number and size * of interrupt sets * @priv: Private data for usage by @calc_sets, usually a * pointer to driver/device specific data. */ struct irq_affinity { unsigned int pre_vectors; unsigned int post_vectors; unsigned int nr_sets; unsigned int set_size[IRQ_AFFINITY_MAX_SETS]; void (*calc_sets)(struct irq_affinity *, unsigned int nvecs); void *priv; }; /** * struct irq_affinity_desc - Interrupt affinity descriptor * @mask: cpumask to hold the affinity assignment * @is_managed: 1 if the interrupt is managed internally */ struct irq_affinity_desc { struct cpumask mask; unsigned int is_managed : 1; }; #if defined(CONFIG_SMP) extern cpumask_var_t irq_default_affinity; /* Internal implementation. Use the helpers below */ extern int __irq_set_affinity(unsigned int irq, const struct cpumask *cpumask, bool force); /** * irq_set_affinity - Set the irq affinity of a given irq * @irq: Interrupt to set affinity * @cpumask: cpumask * * Fails if cpumask does not contain an online CPU */ static inline int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) { return __irq_set_affinity(irq, cpumask, false); } /** * irq_force_affinity - Force the irq affinity of a given irq * @irq: Interrupt to set affinity * @cpumask: cpumask * * Same as irq_set_affinity, but without checking the mask against * online cpus. * * Solely for low level cpu hotplug code, where we need to make per * cpu interrupts affine before the cpu becomes online. */ static inline int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) { return __irq_set_affinity(irq, cpumask, true); } extern int irq_can_set_affinity(unsigned int irq); extern int irq_select_affinity(unsigned int irq); extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m); extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); struct irq_affinity_desc * irq_create_affinity_masks(unsigned int nvec, struct irq_affinity *affd); unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, const struct irq_affinity *affd); #else /* CONFIG_SMP */ static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m) { return -EINVAL; } static inline int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) { return 0; } static inline int irq_can_set_affinity(unsigned int irq) { return 0; } static inline int irq_select_affinity(unsigned int irq) { return 0; } static inline int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) { return -EINVAL; } static inline int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) { return 0; } static inline struct irq_affinity_desc * irq_create_affinity_masks(unsigned int nvec, struct irq_affinity *affd) { return NULL; } static inline unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, const struct irq_affinity *affd) { return maxvec; } #endif /* CONFIG_SMP */ /* * Special lockdep variants of irq disabling/enabling. * These should be used for locking constructs that * know that a particular irq context which is disabled, * and which is the only irq-context user of a lock, * that it's safe to take the lock in the irq-disabled * section without disabling hardirqs. * * On !CONFIG_LOCKDEP they are equivalent to the normal * irq disable/enable methods. */ static inline void disable_irq_nosync_lockdep(unsigned int irq) { disable_irq_nosync(irq); #ifdef CONFIG_LOCKDEP local_irq_disable(); #endif } static inline void disable_irq_nosync_lockdep_irqsave(unsigned int irq, unsigned long *flags) { disable_irq_nosync(irq); #ifdef CONFIG_LOCKDEP local_irq_save(*flags); #endif } static inline void disable_irq_lockdep(unsigned int irq) { disable_irq(irq); #ifdef CONFIG_LOCKDEP local_irq_disable(); #endif } static inline void enable_irq_lockdep(unsigned int irq) { #ifdef CONFIG_LOCKDEP local_irq_enable(); #endif enable_irq(irq); } static inline void enable_irq_lockdep_irqrestore(unsigned int irq, unsigned long *flags) { #ifdef CONFIG_LOCKDEP local_irq_restore(*flags); #endif enable_irq(irq); } /* IRQ wakeup (PM) control: */ extern int irq_set_irq_wake(unsigned int irq, unsigned int on); static inline int enable_irq_wake(unsigned int irq) { return irq_set_irq_wake(irq, 1); } static inline int disable_irq_wake(unsigned int irq) { return irq_set_irq_wake(irq, 0); } /* * irq_get_irqchip_state/irq_set_irqchip_state specific flags */ enum irqchip_irq_state { IRQCHIP_STATE_PENDING, /* Is interrupt pending? */ IRQCHIP_STATE_ACTIVE, /* Is interrupt in progress? */ IRQCHIP_STATE_MASKED, /* Is interrupt masked? */ IRQCHIP_STATE_LINE_LEVEL, /* Is IRQ line high? */ }; extern int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which, bool *state); extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, bool state); #ifdef CONFIG_IRQ_FORCED_THREADING # ifdef CONFIG_PREEMPT_RT # define force_irqthreads (true) # else extern bool force_irqthreads; # endif #else #define force_irqthreads (0) #endif #ifndef local_softirq_pending #ifndef local_softirq_pending_ref #define local_softirq_pending_ref irq_stat.__softirq_pending #endif #define local_softirq_pending() (__this_cpu_read(local_softirq_pending_ref)) #define set_softirq_pending(x) (__this_cpu_write(local_softirq_pending_ref, (x))) #define or_softirq_pending(x) (__this_cpu_or(local_softirq_pending_ref, (x))) #endif /* local_softirq_pending */ /* Some architectures might implement lazy enabling/disabling of * interrupts. In some cases, such as stop_machine, we might want * to ensure that after a local_irq_disable(), interrupts have * really been disabled in hardware. Such architectures need to * implement the following hook. */ #ifndef hard_irq_disable #define hard_irq_disable() do { } while(0) #endif /* PLEASE, avoid to allocate new softirqs, if you need not _really_ high frequency threaded job scheduling. For almost all the purposes tasklets are more than enough. F.e. all serial device BHs et al. should be converted to tasklets, not to softirqs. */ enum { HI_SOFTIRQ=0, TIMER_SOFTIRQ, NET_TX_SOFTIRQ, NET_RX_SOFTIRQ, BLOCK_SOFTIRQ, IRQ_POLL_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS }; #define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. */ extern const char * const softirq_to_name[NR_SOFTIRQS]; /* softirq mask and active fields moved to irq_cpustat_t in * asm/hardirq.h to get better cache usage. KAO */ struct softirq_action { void (*action)(struct softirq_action *); }; asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); #ifdef __ARCH_HAS_DO_SOFTIRQ void do_softirq_own_stack(void); #else static inline void do_softirq_own_stack(void) { __do_softirq(); } #endif extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); extern void __raise_softirq_irqoff(unsigned int nr); extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); DECLARE_PER_CPU(struct task_struct *, ksoftirqd); static inline struct task_struct *this_cpu_ksoftirqd(void) { return this_cpu_read(ksoftirqd); } /* Tasklets --- multithreaded analogue of BHs. This API is deprecated. Please consider using threaded IRQs instead: https://lore.kernel.org/lkml/20200716081538.2sivhkj4hcyrusem@linutronix.de Main feature differing them of generic softirqs: tasklet is running only on one CPU simultaneously. Main feature differing them of BHs: different tasklets may be run simultaneously on different CPUs. Properties: * If tasklet_schedule() is called, then tasklet is guaranteed to be executed on some cpu at least once after this. * If the tasklet is already scheduled, but its execution is still not started, it will be executed only once. * If this tasklet is already running on another CPU (or schedule is called from tasklet itself), it is rescheduled for later. * Tasklet is strictly serialized wrt itself, but not wrt another tasklets. If client needs some intertask synchronization, he makes it with spinlocks. */ struct tasklet_struct { struct tasklet_struct *next; unsigned long state; atomic_t count; bool use_callback; union { void (*func)(unsigned long data); void (*callback)(struct tasklet_struct *t); }; unsigned long data; }; #define DECLARE_TASKLET(name, _callback) \ struct tasklet_struct name = { \ .count = ATOMIC_INIT(0), \ .callback = _callback, \ .use_callback = true, \ } #define DECLARE_TASKLET_DISABLED(name, _callback) \ struct tasklet_struct name = { \ .count = ATOMIC_INIT(1), \ .callback = _callback, \ .use_callback = true, \ } #define from_tasklet(var, callback_tasklet, tasklet_fieldname) \ container_of(callback_tasklet, typeof(*var), tasklet_fieldname) #define DECLARE_TASKLET_OLD(name, _func) \ struct tasklet_struct name = { \ .count = ATOMIC_INIT(0), \ .func = _func, \ } #define DECLARE_TASKLET_DISABLED_OLD(name, _func) \ struct tasklet_struct name = { \ .count = ATOMIC_INIT(1), \ .func = _func, \ } enum { TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */ TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ }; #ifdef CONFIG_SMP static inline int tasklet_trylock(struct tasklet_struct *t) { return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); } static inline void tasklet_unlock(struct tasklet_struct *t) { smp_mb__before_atomic(); clear_bit(TASKLET_STATE_RUN, &(t)->state); } static inline void tasklet_unlock_wait(struct tasklet_struct *t) { while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } } #else #define tasklet_trylock(t) 1 #define tasklet_unlock_wait(t) do { } while (0) #define tasklet_unlock(t) do { } while (0) #endif extern void __tasklet_schedule(struct tasklet_struct *t); static inline void tasklet_schedule(struct tasklet_struct *t) { if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) __tasklet_schedule(t); } extern void __tasklet_hi_schedule(struct tasklet_struct *t); static inline void tasklet_hi_schedule(struct tasklet_struct *t) { if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) __tasklet_hi_schedule(t); } static inline void tasklet_disable_nosync(struct tasklet_struct *t) { atomic_inc(&t->count); smp_mb__after_atomic(); } static inline void tasklet_disable(struct tasklet_struct *t) { tasklet_disable_nosync(t); tasklet_unlock_wait(t); smp_mb(); } static inline void tasklet_enable(struct tasklet_struct *t) { smp_mb__before_atomic(); atomic_dec(&t->count); } extern void tasklet_kill(struct tasklet_struct *t); extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); extern void tasklet_init(struct tasklet_struct *t, void (*func)(unsigned long), unsigned long data); extern void tasklet_setup(struct tasklet_struct *t, void (*callback)(struct tasklet_struct *)); /* * Autoprobing for irqs: * * probe_irq_on() and probe_irq_off() provide robust primitives * for accurate IRQ probing during kernel initialization. They are * reasonably simple to use, are not "fooled" by spurious interrupts, * and, unlike other attempts at IRQ probing, they do not get hung on * stuck interrupts (such as unused PS2 mouse interfaces on ASUS boards). * * For reasonably foolproof probing, use them as follows: * * 1. clear and/or mask the device's internal interrupt. * 2. sti(); * 3. irqs = probe_irq_on(); // "take over" all unassigned idle IRQs * 4. enable the device and cause it to trigger an interrupt. * 5. wait for the device to interrupt, using non-intrusive polling or a delay. * 6. irq = probe_irq_off(irqs); // get IRQ number, 0=none, negative=multiple * 7. service the device to clear its pending interrupt. * 8. loop again if paranoia is required. * * probe_irq_on() returns a mask of allocated irq's. * * probe_irq_off() takes the mask as a parameter, * and returns the irq number which occurred, * or zero if none occurred, or a negative irq number * if more than one irq occurred. */ #if !defined(CONFIG_GENERIC_IRQ_PROBE) static inline unsigned long probe_irq_on(void) { return 0; } static inline int probe_irq_off(unsigned long val) { return 0; } static inline unsigned int probe_irq_mask(unsigned long val) { return 0; } #else extern unsigned long probe_irq_on(void); /* returns 0 on failure */ extern int probe_irq_off(unsigned long); /* returns 0 or negative on failure */ extern unsigned int probe_irq_mask(unsigned long); /* returns mask of ISA interrupts */ #endif #ifdef CONFIG_PROC_FS /* Initialize /proc/irq/ */ extern void init_irq_proc(void); #else static inline void init_irq_proc(void) { } #endif #ifdef CONFIG_IRQ_TIMINGS void irq_timings_enable(void); void irq_timings_disable(void); u64 irq_timings_next_event(u64 now); #endif struct seq_file; int show_interrupts(struct seq_file *p, void *v); int arch_show_interrupts(struct seq_file *p, int prec); extern int early_irq_init(void); extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); /* * We want to know which function is an entrypoint of a hardirq or a softirq. */ #ifndef __irq_entry # define __irq_entry __section(".irqentry.text") #endif #define __softirq_entry __section(".softirqentry.text") #endif
1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PGTABLE_H #define _ASM_X86_PGTABLE_H #include <linux/mem_encrypt.h> #include <asm/page.h> #include <asm/pgtable_types.h> /* * Macro to mark a page protection value as UC- */ #define pgprot_noncached(prot) \ ((boot_cpu_data.x86 > 3) \ ? (__pgprot(pgprot_val(prot) | \ cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \ : (prot)) /* * Macros to add or remove encryption attribute */ #define pgprot_encrypted(prot) __pgprot(__sme_set(pgprot_val(prot))) #define pgprot_decrypted(prot) __pgprot(__sme_clr(pgprot_val(prot))) #ifndef __ASSEMBLY__ #include <asm/x86_init.h> #include <asm/fpu/xstate.h> #include <asm/fpu/api.h> #include <asm-generic/pgtable_uffd.h> extern pgd_t early_top_pgt[PTRS_PER_PGD]; bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd); void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm); void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, bool user); void ptdump_walk_pgd_level_checkwx(void); void ptdump_walk_user_pgd_level_checkwx(void); #ifdef CONFIG_DEBUG_WX #define debug_checkwx() ptdump_walk_pgd_level_checkwx() #define debug_checkwx_user() ptdump_walk_user_pgd_level_checkwx() #else #define debug_checkwx() do { } while (0) #define debug_checkwx_user() do { } while (0) #endif /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __visible; #define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page)) extern spinlock_t pgd_lock; extern struct list_head pgd_list; extern struct mm_struct *pgd_page_get_mm(struct page *page); extern pmdval_t early_pmd_flags; #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else /* !CONFIG_PARAVIRT_XXL */ #define set_pte(ptep, pte) native_set_pte(ptep, pte) #define set_pte_atomic(ptep, pte) \ native_set_pte_atomic(ptep, pte) #define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd) #ifndef __PAGETABLE_P4D_FOLDED #define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) #define pgd_clear(pgd) (pgtable_l5_enabled() ? native_pgd_clear(pgd) : 0) #endif #ifndef set_p4d # define set_p4d(p4dp, p4d) native_set_p4d(p4dp, p4d) #endif #ifndef __PAGETABLE_PUD_FOLDED #define p4d_clear(p4d) native_p4d_clear(p4d) #endif #ifndef set_pud # define set_pud(pudp, pud) native_set_pud(pudp, pud) #endif #ifndef __PAGETABLE_PUD_FOLDED #define pud_clear(pud) native_pud_clear(pud) #endif #define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) #define pmd_clear(pmd) native_pmd_clear(pmd) #define pgd_val(x) native_pgd_val(x) #define __pgd(x) native_make_pgd(x) #ifndef __PAGETABLE_P4D_FOLDED #define p4d_val(x) native_p4d_val(x) #define __p4d(x) native_make_p4d(x) #endif #ifndef __PAGETABLE_PUD_FOLDED #define pud_val(x) native_pud_val(x) #define __pud(x) native_make_pud(x) #endif #ifndef __PAGETABLE_PMD_FOLDED #define pmd_val(x) native_pmd_val(x) #define __pmd(x) native_make_pmd(x) #endif #define pte_val(x) native_pte_val(x) #define __pte(x) native_make_pte(x) #define arch_end_context_switch(prev) do {} while(0) #endif /* CONFIG_PARAVIRT_XXL */ /* * The following only work if pte_present() is true. * Undefined behaviour if not.. */ static inline int pte_dirty(pte_t pte) { return pte_flags(pte) & _PAGE_DIRTY; } static inline u32 read_pkru(void) { if (boot_cpu_has(X86_FEATURE_OSPKE)) return rdpkru(); return 0; } static inline void write_pkru(u32 pkru) { struct pkru_state *pk; if (!boot_cpu_has(X86_FEATURE_OSPKE)) return; pk = get_xsave_addr(&current->thread.fpu.state.xsave, XFEATURE_PKRU); /* * The PKRU value in xstate needs to be in sync with the value that is * written to the CPU. The FPU restore on return to userland would * otherwise load the previous value again. */ fpregs_lock(); if (pk) pk->pkru = pkru; __write_pkru(pkru); fpregs_unlock(); } static inline int pte_young(pte_t pte) { return pte_flags(pte) & _PAGE_ACCESSED; } static inline int pmd_dirty(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_DIRTY; } static inline int pmd_young(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_ACCESSED; } static inline int pud_dirty(pud_t pud) { return pud_flags(pud) & _PAGE_DIRTY; } static inline int pud_young(pud_t pud) { return pud_flags(pud) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return pte_flags(pte) & _PAGE_RW; } static inline int pte_huge(pte_t pte) { return pte_flags(pte) & _PAGE_PSE; } static inline int pte_global(pte_t pte) { return pte_flags(pte) & _PAGE_GLOBAL; } static inline int pte_exec(pte_t pte) { return !(pte_flags(pte) & _PAGE_NX); } static inline int pte_special(pte_t pte) { return pte_flags(pte) & _PAGE_SPECIAL; } /* Entries that were set to PROT_NONE are inverted */ static inline u64 protnone_mask(u64 val); static inline unsigned long pte_pfn(pte_t pte) { phys_addr_t pfn = pte_val(pte); pfn ^= protnone_mask(pfn); return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT; } static inline unsigned long pmd_pfn(pmd_t pmd) { phys_addr_t pfn = pmd_val(pmd); pfn ^= protnone_mask(pfn); return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; } static inline unsigned long pud_pfn(pud_t pud) { phys_addr_t pfn = pud_val(pud); pfn ^= protnone_mask(pfn); return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT; } static inline unsigned long p4d_pfn(p4d_t p4d) { return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT; } static inline unsigned long pgd_pfn(pgd_t pgd) { return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT; } #define p4d_leaf p4d_large static inline int p4d_large(p4d_t p4d) { /* No 512 GiB pages yet */ return 0; } #define pte_page(pte) pfn_to_page(pte_pfn(pte)) #define pmd_leaf pmd_large static inline int pmd_large(pmd_t pte) { return pmd_flags(pte) & _PAGE_PSE; } #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* NOTE: when predicate huge page, consider also pmd_devmap, or use pmd_large */ static inline int pmd_trans_huge(pmd_t pmd) { return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static inline int pud_trans_huge(pud_t pud) { return (pud_val(pud) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; } #endif #define has_transparent_hugepage has_transparent_hugepage static inline int has_transparent_hugepage(void) { return boot_cpu_has(X86_FEATURE_PSE); } #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pmd_devmap(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_DEVMAP); } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static inline int pud_devmap(pud_t pud) { return !!(pud_val(pud) & _PAGE_DEVMAP); } #else static inline int pud_devmap(pud_t pud) { return 0; } #endif static inline int pgd_devmap(pgd_t pgd) { return 0; } #endif #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline pte_t pte_set_flags(pte_t pte, pteval_t set) { pteval_t v = native_pte_val(pte); return native_make_pte(v | set); } static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear) { pteval_t v = native_pte_val(pte); return native_make_pte(v & ~clear); } #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP static inline int pte_uffd_wp(pte_t pte) { return pte_flags(pte) & _PAGE_UFFD_WP; } static inline pte_t pte_mkuffd_wp(pte_t pte) { return pte_set_flags(pte, _PAGE_UFFD_WP); } static inline pte_t pte_clear_uffd_wp(pte_t pte) { return pte_clear_flags(pte, _PAGE_UFFD_WP); } #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ static inline pte_t pte_mkclean(pte_t pte) { return pte_clear_flags(pte, _PAGE_DIRTY); } static inline pte_t pte_mkold(pte_t pte) { return pte_clear_flags(pte, _PAGE_ACCESSED); } static inline pte_t pte_wrprotect(pte_t pte) { return pte_clear_flags(pte, _PAGE_RW); } static inline pte_t pte_mkexec(pte_t pte) { return pte_clear_flags(pte, _PAGE_NX); } static inline pte_t pte_mkdirty(pte_t pte) { return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); } static inline pte_t pte_mkyoung(pte_t pte) { return pte_set_flags(pte, _PAGE_ACCESSED); } static inline pte_t pte_mkwrite(pte_t pte) { return pte_set_flags(pte, _PAGE_RW); } static inline pte_t pte_mkhuge(pte_t pte) { return pte_set_flags(pte, _PAGE_PSE); } static inline pte_t pte_clrhuge(pte_t pte) { return pte_clear_flags(pte, _PAGE_PSE); } static inline pte_t pte_mkglobal(pte_t pte) { return pte_set_flags(pte, _PAGE_GLOBAL); } static inline pte_t pte_clrglobal(pte_t pte) { return pte_clear_flags(pte, _PAGE_GLOBAL); } static inline pte_t pte_mkspecial(pte_t pte) { return pte_set_flags(pte, _PAGE_SPECIAL); } static inline pte_t pte_mkdevmap(pte_t pte) { return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP); } static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set) { pmdval_t v = native_pmd_val(pmd); return native_make_pmd(v | set); } static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) { pmdval_t v = native_pmd_val(pmd); return native_make_pmd(v & ~clear); } #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP static inline int pmd_uffd_wp(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_UFFD_WP; } static inline pmd_t pmd_mkuffd_wp(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_UFFD_WP); } static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_UFFD_WP); } #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ static inline pmd_t pmd_mkold(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_ACCESSED); } static inline pmd_t pmd_mkclean(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_DIRTY); } static inline pmd_t pmd_wrprotect(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_RW); } static inline pmd_t pmd_mkdirty(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); } static inline pmd_t pmd_mkdevmap(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_DEVMAP); } static inline pmd_t pmd_mkhuge(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_PSE); } static inline pmd_t pmd_mkyoung(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_ACCESSED); } static inline pmd_t pmd_mkwrite(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_RW); } static inline pud_t pud_set_flags(pud_t pud, pudval_t set) { pudval_t v = native_pud_val(pud); return native_make_pud(v | set); } static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear) { pudval_t v = native_pud_val(pud); return native_make_pud(v & ~clear); } static inline pud_t pud_mkold(pud_t pud) { return pud_clear_flags(pud, _PAGE_ACCESSED); } static inline pud_t pud_mkclean(pud_t pud) { return pud_clear_flags(pud, _PAGE_DIRTY); } static inline pud_t pud_wrprotect(pud_t pud) { return pud_clear_flags(pud, _PAGE_RW); } static inline pud_t pud_mkdirty(pud_t pud) { return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); } static inline pud_t pud_mkdevmap(pud_t pud) { return pud_set_flags(pud, _PAGE_DEVMAP); } static inline pud_t pud_mkhuge(pud_t pud) { return pud_set_flags(pud, _PAGE_PSE); } static inline pud_t pud_mkyoung(pud_t pud) { return pud_set_flags(pud, _PAGE_ACCESSED); } static inline pud_t pud_mkwrite(pud_t pud) { return pud_set_flags(pud, _PAGE_RW); } #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline int pte_soft_dirty(pte_t pte) { return pte_flags(pte) & _PAGE_SOFT_DIRTY; } static inline int pmd_soft_dirty(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_SOFT_DIRTY; } static inline int pud_soft_dirty(pud_t pud) { return pud_flags(pud) & _PAGE_SOFT_DIRTY; } static inline pte_t pte_mksoft_dirty(pte_t pte) { return pte_set_flags(pte, _PAGE_SOFT_DIRTY); } static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); } static inline pud_t pud_mksoft_dirty(pud_t pud) { return pud_set_flags(pud, _PAGE_SOFT_DIRTY); } static inline pte_t pte_clear_soft_dirty(pte_t pte) { return pte_clear_flags(pte, _PAGE_SOFT_DIRTY); } static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY); } static inline pud_t pud_clear_soft_dirty(pud_t pud) { return pud_clear_flags(pud, _PAGE_SOFT_DIRTY); } #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ /* * Mask out unsupported bits in a present pgprot. Non-present pgprots * can use those bits for other purposes, so leave them be. */ static inline pgprotval_t massage_pgprot(pgprot_t pgprot) { pgprotval_t protval = pgprot_val(pgprot); if (protval & _PAGE_PRESENT) protval &= __supported_pte_mask; return protval; } static inline pgprotval_t check_pgprot(pgprot_t pgprot) { pgprotval_t massaged_val = massage_pgprot(pgprot); /* mmdebug.h can not be included here because of dependencies */ #ifdef CONFIG_DEBUG_VM WARN_ONCE(pgprot_val(pgprot) != massaged_val, "attempted to set unsupported pgprot: %016llx " "bits: %016llx supported: %016llx\n", (u64)pgprot_val(pgprot), (u64)pgprot_val(pgprot) ^ massaged_val, (u64)__supported_pte_mask); #endif return massaged_val; } static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; pfn ^= protnone_mask(pgprot_val(pgprot)); pfn &= PTE_PFN_MASK; return __pte(pfn | check_pgprot(pgprot)); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; pfn ^= protnone_mask(pgprot_val(pgprot)); pfn &= PHYSICAL_PMD_PAGE_MASK; return __pmd(pfn | check_pgprot(pgprot)); } static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot) { phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; pfn ^= protnone_mask(pgprot_val(pgprot)); pfn &= PHYSICAL_PUD_PAGE_MASK; return __pud(pfn | check_pgprot(pgprot)); } static inline pmd_t pmd_mkinvalid(pmd_t pmd) { return pfn_pmd(pmd_pfn(pmd), __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); } static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask); static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { pteval_t val = pte_val(pte), oldval = val; /* * Chop off the NX bit (if present), and add the NX portion of * the newprot (if present): */ val &= _PAGE_CHG_MASK; val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK; val = flip_protnone_guard(oldval, val, PTE_PFN_MASK); return __pte(val); } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { pmdval_t val = pmd_val(pmd), oldval = val; val &= _HPAGE_CHG_MASK; val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK; val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK); return __pmd(val); } /* * mprotect needs to preserve PAT and encryption bits when updating * vm_page_prot */ #define pgprot_modify pgprot_modify static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) { pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK; pgprotval_t addbits = pgprot_val(newprot) & ~_PAGE_CHG_MASK; return __pgprot(preservebits | addbits); } #define pte_pgprot(x) __pgprot(pte_flags(x)) #define pmd_pgprot(x) __pgprot(pmd_flags(x)) #define pud_pgprot(x) __pgprot(pud_flags(x)) #define p4d_pgprot(x) __pgprot(p4d_flags(x)) #define canon_pgprot(p) __pgprot(massage_pgprot(p)) static inline pgprot_t arch_filter_pgprot(pgprot_t prot) { return canon_pgprot(prot); } static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, enum page_cache_mode pcm, enum page_cache_mode new_pcm) { /* * PAT type is always WB for untracked ranges, so no need to check. */ if (x86_platform.is_untracked_pat_range(paddr, paddr + size)) return 1; /* * Certain new memtypes are not allowed with certain * requested memtype: * - request is uncached, return cannot be write-back * - request is write-combine, return cannot be write-back * - request is write-through, return cannot be write-back * - request is write-through, return cannot be write-combine */ if ((pcm == _PAGE_CACHE_MODE_UC_MINUS && new_pcm == _PAGE_CACHE_MODE_WB) || (pcm == _PAGE_CACHE_MODE_WC && new_pcm == _PAGE_CACHE_MODE_WB) || (pcm == _PAGE_CACHE_MODE_WT && new_pcm == _PAGE_CACHE_MODE_WB) || (pcm == _PAGE_CACHE_MODE_WT && new_pcm == _PAGE_CACHE_MODE_WC)) { return 0; } return 1; } pmd_t *populate_extra_pmd(unsigned long vaddr); pte_t *populate_extra_pte(unsigned long vaddr); #ifdef CONFIG_PAGE_TABLE_ISOLATION pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd); /* * Take a PGD location (pgdp) and a pgd value that needs to be set there. * Populates the user and returns the resulting PGD that must be set in * the kernel copy of the page tables. */ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) { if (!static_cpu_has(X86_FEATURE_PTI)) return pgd; return __pti_set_user_pgtbl(pgdp, pgd); } #else /* CONFIG_PAGE_TABLE_ISOLATION */ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) { return pgd; } #endif /* CONFIG_PAGE_TABLE_ISOLATION */ #endif /* __ASSEMBLY__ */ #ifdef CONFIG_X86_32 # include <asm/pgtable_32.h> #else # include <asm/pgtable_64.h> #endif #ifndef __ASSEMBLY__ #include <linux/mm_types.h> #include <linux/mmdebug.h> #include <linux/log2.h> #include <asm/fixmap.h> static inline int pte_none(pte_t pte) { return !(pte.pte & ~(_PAGE_KNL_ERRATUM_MASK)); } #define __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t a, pte_t b) { return a.pte == b.pte; } static inline int pte_present(pte_t a) { return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); } #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pte_devmap(pte_t a) { return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP; } #endif #define pte_accessible pte_accessible static inline bool pte_accessible(struct mm_struct *mm, pte_t a) { if (pte_flags(a) & _PAGE_PRESENT) return true; if ((pte_flags(a) & _PAGE_PROTNONE) && mm_tlb_flush_pending(mm)) return true; return false; } static inline int pmd_present(pmd_t pmd) { /* * Checking for _PAGE_PSE is needed too because * split_huge_page will temporarily clear the present bit (but * the _PAGE_PSE flag will remain set at all times while the * _PAGE_PRESENT bit is clear). */ return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE); } #ifdef CONFIG_NUMA_BALANCING /* * These work without NUMA balancing but the kernel does not care. See the * comment in include/linux/pgtable.h */ static inline int pte_protnone(pte_t pte) { return (pte_flags(pte) & (_PAGE_PROTNONE | _PAGE_PRESENT)) == _PAGE_PROTNONE; } static inline int pmd_protnone(pmd_t pmd) { return (pmd_flags(pmd) & (_PAGE_PROTNONE | _PAGE_PRESENT)) == _PAGE_PROTNONE; } #endif /* CONFIG_NUMA_BALANCING */ static inline int pmd_none(pmd_t pmd) { /* Only check low word on 32-bit platforms, since it might be out of sync with upper half. */ unsigned long val = native_pmd_val(pmd); return (val & ~_PAGE_KNL_ERRATUM_MASK) == 0; } static inline unsigned long pmd_page_vaddr(pmd_t pmd) { return (unsigned long)__va(pmd_val(pmd) & pmd_pfn_mask(pmd)); } /* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. * * (Currently stuck as a macro because of indirect forward reference * to linux/mm.h:page_to_nid()) */ #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) static inline int pmd_bad(pmd_t pmd) { return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; } static inline unsigned long pages_to_mb(unsigned long npg) { return npg >> (20 - PAGE_SHIFT); } #if CONFIG_PGTABLE_LEVELS > 2 static inline int pud_none(pud_t pud) { return (native_pud_val(pud) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0; } static inline int pud_present(pud_t pud) { return pud_flags(pud) & _PAGE_PRESENT; } static inline unsigned long pud_page_vaddr(pud_t pud) { return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud)); } /* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ #define pud_page(pud) pfn_to_page(pud_pfn(pud)) #define pud_leaf pud_large static inline int pud_large(pud_t pud) { return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == (_PAGE_PSE | _PAGE_PRESENT); } static inline int pud_bad(pud_t pud) { return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; } #else #define pud_leaf pud_large static inline int pud_large(pud_t pud) { return 0; } #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #if CONFIG_PGTABLE_LEVELS > 3 static inline int p4d_none(p4d_t p4d) { return (native_p4d_val(p4d) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0; } static inline int p4d_present(p4d_t p4d) { return p4d_flags(p4d) & _PAGE_PRESENT; } static inline unsigned long p4d_page_vaddr(p4d_t p4d) { return (unsigned long)__va(p4d_val(p4d) & p4d_pfn_mask(p4d)); } /* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ #define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d)) static inline int p4d_bad(p4d_t p4d) { unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER; if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) ignore_flags |= _PAGE_NX; return (p4d_flags(p4d) & ~ignore_flags) != 0; } #endif /* CONFIG_PGTABLE_LEVELS > 3 */ static inline unsigned long p4d_index(unsigned long address) { return (address >> P4D_SHIFT) & (PTRS_PER_P4D - 1); } #if CONFIG_PGTABLE_LEVELS > 4 static inline int pgd_present(pgd_t pgd) { if (!pgtable_l5_enabled()) return 1; return pgd_flags(pgd) & _PAGE_PRESENT; } static inline unsigned long pgd_page_vaddr(pgd_t pgd) { return (unsigned long)__va((unsigned long)pgd_val(pgd) & PTE_PFN_MASK); } /* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ #define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd)) /* to find an entry in a page-table-directory. */ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) { if (!pgtable_l5_enabled()) return (p4d_t *)pgd; return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address); } static inline int pgd_bad(pgd_t pgd) { unsigned long ignore_flags = _PAGE_USER; if (!pgtable_l5_enabled()) return 0; if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) ignore_flags |= _PAGE_NX; return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; } static inline int pgd_none(pgd_t pgd) { if (!pgtable_l5_enabled()) return 0; /* * There is no need to do a workaround for the KNL stray * A/D bit erratum here. PGDs only point to page tables * except on 32-bit non-PAE which is not supported on * KNL. */ return !native_pgd_val(pgd); } #endif /* CONFIG_PGTABLE_LEVELS > 4 */ #endif /* __ASSEMBLY__ */ #define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) #define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) #ifndef __ASSEMBLY__ extern int direct_gbpages; void init_mem_mapping(void); void early_alloc_pgt_buf(void); extern void memblock_find_dma_reserve(void); void __init poking_init(void); unsigned long init_memory_mapping(unsigned long start, unsigned long end, pgprot_t prot); #ifdef CONFIG_X86_64 extern pgd_t trampoline_pgd_entry; #endif /* local pte updates need not use xchg for locking */ static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) { pte_t res = *ptep; /* Pure native function needs no input for mm, addr */ native_pte_clear(NULL, 0, ptep); return res; } static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp) { pmd_t res = *pmdp; native_pmd_clear(pmdp); return res; } static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp) { pud_t res = *pudp; native_pud_clear(pudp); return res; } static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { set_pte(ptep, pte); } static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { set_pmd(pmdp, pmd); } static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, pud_t *pudp, pud_t pud) { native_set_pud(pudp, pud); } /* * We only update the dirty/accessed state if we set * the dirty bit by hand in the kernel, since the hardware * will do the accessed bit for us, and we don't want to * race with other CPU's that might be updating the dirty * bit at the same time. */ struct vm_area_struct; #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t entry, int dirty); #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG extern int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH extern int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t pte = native_ptep_get_and_clear(ptep); return pte; } #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) { pte_t pte; if (full) { /* * Full address destruction in progress; paravirt does not * care about updates and native needs no locking */ pte = native_local_ptep_get_and_clear(ptep); } else { pte = ptep_get_and_clear(mm, addr, ptep); } return pte; } #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte); } #define flush_tlb_fix_spurious_fault(vma, address) do { } while (0) #define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS extern int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t entry, int dirty); extern int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pud_t *pudp, pud_t entry, int dirty); #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp); extern int pudp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pud_t *pudp); #define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH extern int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); #define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_RW; } #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { return native_pmdp_get_and_clear(pmdp); } #define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pud_t *pudp) { return native_pudp_get_and_clear(pudp); } #define __HAVE_ARCH_PMDP_SET_WRPROTECT static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp); } #define pud_write pud_write static inline int pud_write(pud_t pud) { return pud_flags(pud) & _PAGE_RW; } #ifndef pmdp_establish #define pmdp_establish pmdp_establish static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { if (IS_ENABLED(CONFIG_SMP)) { return xchg(pmdp, pmd); } else { pmd_t old = *pmdp; WRITE_ONCE(*pmdp, pmd); return old; } } #endif /* * Page table pages are page-aligned. The lower half of the top * level is used for userspace and the top half for the kernel. * * Returns true for parts of the PGD that map userspace and * false for the parts that map the kernel. */ static inline bool pgdp_maps_userspace(void *__ptr) { unsigned long ptr = (unsigned long)__ptr; return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START); } #define pgd_leaf pgd_large static inline int pgd_large(pgd_t pgd) { return 0; } #ifdef CONFIG_PAGE_TABLE_ISOLATION /* * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and * the user one is in the last 4k. To switch between them, you * just need to flip the 12th bit in their addresses. */ #define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT /* * This generates better code than the inline assembly in * __set_bit(). */ static inline void *ptr_set_bit(void *ptr, int bit) { unsigned long __ptr = (unsigned long)ptr; __ptr |= BIT(bit); return (void *)__ptr; } static inline void *ptr_clear_bit(void *ptr, int bit) { unsigned long __ptr = (unsigned long)ptr; __ptr &= ~BIT(bit); return (void *)__ptr; } static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp) { return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); } static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp) { return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); } static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp) { return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); } static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) { return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); } #endif /* CONFIG_PAGE_TABLE_ISOLATION */ /* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); * * dst - pointer to pgd range anwhere on a pgd page * src - "" * count - the number of pgds to copy. * * dst and src can be on the same page, but the range must not overlap, * and must not cross a page boundary. */ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) { memcpy(dst, src, count * sizeof(pgd_t)); #ifdef CONFIG_PAGE_TABLE_ISOLATION if (!static_cpu_has(X86_FEATURE_PTI)) return; /* Clone the user space pgd as well */ memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src), count * sizeof(pgd_t)); #endif } #define PTE_SHIFT ilog2(PTRS_PER_PTE) static inline int page_level_shift(enum pg_level level) { return (PAGE_SHIFT - PTE_SHIFT) + level * PTE_SHIFT; } static inline unsigned long page_level_size(enum pg_level level) { return 1UL << page_level_shift(level); } static inline unsigned long page_level_mask(enum pg_level level) { return ~(page_level_size(level) - 1); } /* * The x86 doesn't have any external MMU info: the kernel page * tables contain all the necessary information. */ static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { } static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd) { } static inline void update_mmu_cache_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud) { } #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline pte_t pte_swp_mksoft_dirty(pte_t pte) { return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY); } static inline int pte_swp_soft_dirty(pte_t pte) { return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY; } static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) { return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); } #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_SWP_SOFT_DIRTY); } static inline int pmd_swp_soft_dirty(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_SWP_SOFT_DIRTY; } static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_SWP_SOFT_DIRTY); } #endif #endif #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP static inline pte_t pte_swp_mkuffd_wp(pte_t pte) { return pte_set_flags(pte, _PAGE_SWP_UFFD_WP); } static inline int pte_swp_uffd_wp(pte_t pte) { return pte_flags(pte) & _PAGE_SWP_UFFD_WP; } static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) { return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP); } static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_SWP_UFFD_WP); } static inline int pmd_swp_uffd_wp(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_SWP_UFFD_WP; } static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_SWP_UFFD_WP); } #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ #define PKRU_AD_BIT 0x1u #define PKRU_WD_BIT 0x2u #define PKRU_BITS_PER_PKEY 2 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS extern u32 init_pkru_value; #else #define init_pkru_value 0 #endif static inline bool __pkru_allows_read(u32 pkru, u16 pkey) { int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits)); } static inline bool __pkru_allows_write(u32 pkru, u16 pkey) { int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; /* * Access-disable disables writes too so we need to check * both bits here. */ return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits)); } static inline u16 pte_flags_pkey(unsigned long pte_flags) { #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS /* ifdef to avoid doing 59-bit shift on 32-bit values */ return (pte_flags & _PAGE_PKEY_MASK) >> _PAGE_BIT_PKEY_BIT0; #else return 0; #endif } static inline bool __pkru_allows_pkey(u16 pkey, bool write) { u32 pkru = read_pkru(); if (!__pkru_allows_read(pkru, pkey)) return false; if (write && !__pkru_allows_write(pkru, pkey)) return false; return true; } /* * 'pteval' can come from a PTE, PMD or PUD. We only check * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the * same value on all 3 types. */ static inline bool __pte_access_permitted(unsigned long pteval, bool write) { unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER; if (write) need_pte_bits |= _PAGE_RW; if ((pteval & need_pte_bits) != need_pte_bits) return 0; return __pkru_allows_pkey(pte_flags_pkey(pteval), write); } #define pte_access_permitted pte_access_permitted static inline bool pte_access_permitted(pte_t pte, bool write) { return __pte_access_permitted(pte_val(pte), write); } #define pmd_access_permitted pmd_access_permitted static inline bool pmd_access_permitted(pmd_t pmd, bool write) { return __pte_access_permitted(pmd_val(pmd), write); } #define pud_access_permitted pud_access_permitted static inline bool pud_access_permitted(pud_t pud, bool write) { return __pte_access_permitted(pud_val(pud), write); } #define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1 extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot); static inline bool arch_has_pfn_modify_check(void) { return boot_cpu_has_bug(X86_BUG_L1TF); } #define arch_faults_on_old_pte arch_faults_on_old_pte static inline bool arch_faults_on_old_pte(void) { return false; } #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 /* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/nmi.h */ #ifndef LINUX_NMI_H #define LINUX_NMI_H #include <linux/sched.h> #include <asm/irq.h> #if defined(CONFIG_HAVE_NMI_WATCHDOG) #include <asm/nmi.h> #endif #ifdef CONFIG_LOCKUP_DETECTOR void lockup_detector_init(void); void lockup_detector_soft_poweroff(void); void lockup_detector_cleanup(void); bool is_hardlockup(void); extern int watchdog_user_enabled; extern int nmi_watchdog_user_enabled; extern int soft_watchdog_user_enabled; extern int watchdog_thresh; extern unsigned long watchdog_enabled; extern struct cpumask watchdog_cpumask; extern unsigned long *watchdog_cpumask_bits; #ifdef CONFIG_SMP extern int sysctl_softlockup_all_cpu_backtrace; extern int sysctl_hardlockup_all_cpu_backtrace; #else #define sysctl_softlockup_all_cpu_backtrace 0 #define sysctl_hardlockup_all_cpu_backtrace 0 #endif /* !CONFIG_SMP */ #else /* CONFIG_LOCKUP_DETECTOR */ static inline void lockup_detector_init(void) { } static inline void lockup_detector_soft_poweroff(void) { } static inline void lockup_detector_cleanup(void) { } #endif /* !CONFIG_LOCKUP_DETECTOR */ #ifdef CONFIG_SOFTLOCKUP_DETECTOR extern void touch_softlockup_watchdog_sched(void); extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); extern unsigned int softlockup_panic; extern int lockup_detector_online_cpu(unsigned int cpu); extern int lockup_detector_offline_cpu(unsigned int cpu); #else /* CONFIG_SOFTLOCKUP_DETECTOR */ static inline void touch_softlockup_watchdog_sched(void) { } static inline void touch_softlockup_watchdog(void) { } static inline void touch_softlockup_watchdog_sync(void) { } static inline void touch_all_softlockup_watchdogs(void) { } #define lockup_detector_online_cpu NULL #define lockup_detector_offline_cpu NULL #endif /* CONFIG_SOFTLOCKUP_DETECTOR */ #ifdef CONFIG_DETECT_HUNG_TASK void reset_hung_task_detector(void); #else static inline void reset_hung_task_detector(void) { } #endif /* * The run state of the lockup detectors is controlled by the content of the * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector. * * 'watchdog_user_enabled', 'nmi_watchdog_user_enabled' and * 'soft_watchdog_user_enabled' are variables that are only used as an * 'interface' between the parameters in /proc/sys/kernel and the internal * state bits in 'watchdog_enabled'. The 'watchdog_thresh' variable is * handled differently because its value is not boolean, and the lockup * detectors are 'suspended' while 'watchdog_thresh' is equal zero. */ #define NMI_WATCHDOG_ENABLED_BIT 0 #define SOFT_WATCHDOG_ENABLED_BIT 1 #define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) #define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) #if defined(CONFIG_HARDLOCKUP_DETECTOR) extern void hardlockup_detector_disable(void); extern unsigned int hardlockup_panic; #else static inline void hardlockup_detector_disable(void) {} #endif #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) # define NMI_WATCHDOG_SYSCTL_PERM 0644 #else # define NMI_WATCHDOG_SYSCTL_PERM 0444 #endif #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) extern void arch_touch_nmi_watchdog(void); extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); extern void hardlockup_detector_perf_disable(void); extern void hardlockup_detector_perf_enable(void); extern void hardlockup_detector_perf_cleanup(void); extern int hardlockup_detector_perf_init(void); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } static inline void hardlockup_detector_perf_disable(void) { } static inline void hardlockup_detector_perf_enable(void) { } static inline void hardlockup_detector_perf_cleanup(void) { } # if !defined(CONFIG_HAVE_NMI_WATCHDOG) static inline int hardlockup_detector_perf_init(void) { return -ENODEV; } static inline void arch_touch_nmi_watchdog(void) {} # else static inline int hardlockup_detector_perf_init(void) { return 0; } # endif #endif void watchdog_nmi_stop(void); void watchdog_nmi_start(void); int watchdog_nmi_probe(void); int watchdog_nmi_enable(unsigned int cpu); void watchdog_nmi_disable(unsigned int cpu); /** * touch_nmi_watchdog - restart NMI watchdog timeout. * * If the architecture supports the NMI watchdog, touch_nmi_watchdog() * may be used to reset the timeout - for code which intentionally * disables interrupts for a long time. This call is stateless. */ static inline void touch_nmi_watchdog(void) { arch_touch_nmi_watchdog(); touch_softlockup_watchdog(); } /* * Create trigger_all_cpu_backtrace() out of the arch-provided * base function. Return whether such support was available, * to allow calling code to fall back to some other mechanism: */ #ifdef arch_trigger_cpumask_backtrace static inline bool trigger_all_cpu_backtrace(void) { arch_trigger_cpumask_backtrace(cpu_online_mask, false); return true; } static inline bool trigger_allbutself_cpu_backtrace(void) { arch_trigger_cpumask_backtrace(cpu_online_mask, true); return true; } static inline bool trigger_cpumask_backtrace(struct cpumask *mask) { arch_trigger_cpumask_backtrace(mask, false); return true; } static inline bool trigger_single_cpu_backtrace(int cpu) { arch_trigger_cpumask_backtrace(cpumask_of(cpu), false); return true; } /* generic implementation */ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self, void (*raise)(cpumask_t *mask)); bool nmi_cpu_backtrace(struct pt_regs *regs); #else static inline bool trigger_all_cpu_backtrace(void) { return false; } static inline bool trigger_allbutself_cpu_backtrace(void) { return false; } static inline bool trigger_cpumask_backtrace(struct cpumask *mask) { return false; } static inline bool trigger_single_cpu_backtrace(int cpu) { return false; } #endif #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF u64 hw_nmi_get_sample_period(int watchdog_thresh); #endif #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ defined(CONFIG_HARDLOCKUP_DETECTOR) void watchdog_update_hrtimer_threshold(u64 period); #else static inline void watchdog_update_hrtimer_threshold(u64 period) { } #endif struct ctl_table; int proc_watchdog(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_nmi_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *); int proc_soft_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *); int proc_watchdog_thresh(struct ctl_table *, int , void *, size_t *, loff_t *); int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *); #ifdef CONFIG_HAVE_ACPI_APEI_NMI #include <asm/nmi.h> #endif #endif
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_PGTABLE_INVERT_H #define _ASM_PGTABLE_INVERT_H 1 #ifndef __ASSEMBLY__ /* * A clear pte value is special, and doesn't get inverted. * * Note that even users that only pass a pgprot_t (rather * than a full pte) won't trigger the special zero case, * because even PAGE_NONE has _PAGE_PROTNONE | _PAGE_ACCESSED * set. So the all zero case really is limited to just the * cleared page table entry case. */ static inline bool __pte_needs_invert(u64 val) { return val && !(val & _PAGE_PRESENT); } /* Get a mask to xor with the page table entry to get the correct pfn. */ static inline u64 protnone_mask(u64 val) { return __pte_needs_invert(val) ? ~0ull : 0; } static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask) { /* * When a PTE transitions from NONE to !NONE or vice-versa * invert the PFN part to stop speculation. * pte_pfn undoes this when needed. */ if (__pte_needs_invert(oldval) != __pte_needs_invert(val)) val = (val & ~mask) | (~val & mask); return val; } #endif /* __ASSEMBLY__ */ #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 /* SPDX-License-Identifier: GPL-2.0 */ /* * Wrapper functions for accessing the file_struct fd array. */ #ifndef __LINUX_FILE_H #define __LINUX_FILE_H #include <linux/compiler.h> #include <linux/types.h> #include <linux/posix_types.h> #include <linux/errno.h> struct file; extern void fput(struct file *); extern void fput_many(struct file *, unsigned int); struct file_operations; struct task_struct; struct vfsmount; struct dentry; struct inode; struct path; extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *, const char *, int flags, const struct file_operations *); extern struct file *alloc_file_clone(struct file *, int flags, const struct file_operations *); static inline void fput_light(struct file *file, int fput_needed) { if (fput_needed) fput(file); } struct fd { struct file *file; unsigned int flags; }; #define FDPUT_FPUT 1 #define FDPUT_POS_UNLOCK 2 static inline void fdput(struct fd fd) { if (fd.flags & FDPUT_FPUT) fput(fd.file); } extern struct file *fget(unsigned int fd); extern struct file *fget_many(unsigned int fd, unsigned int refs); extern struct file *fget_raw(unsigned int fd); extern struct file *fget_task(struct task_struct *task, unsigned int fd); extern unsigned long __fdget(unsigned int fd); extern unsigned long __fdget_raw(unsigned int fd); extern unsigned long __fdget_pos(unsigned int fd); extern void __f_unlock_pos(struct file *); static inline struct fd __to_fd(unsigned long v) { return (struct fd){(struct file *)(v & ~3),v & 3}; } static inline struct fd fdget(unsigned int fd) { return __to_fd(__fdget(fd)); } static inline struct fd fdget_raw(unsigned int fd) { return __to_fd(__fdget_raw(fd)); } static inline struct fd fdget_pos(int fd) { return __to_fd(__fdget_pos(fd)); } static inline void fdput_pos(struct fd f) { if (f.flags & FDPUT_POS_UNLOCK) __f_unlock_pos(f.file); fdput(f); } extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); extern bool get_close_on_exec(unsigned int fd); extern int __get_unused_fd_flags(unsigned flags, unsigned long nofile); extern int get_unused_fd_flags(unsigned flags); extern void put_unused_fd(unsigned int fd); extern void fd_install(unsigned int fd, struct file *file); extern int __receive_fd(int fd, struct file *file, int __user *ufd, unsigned int o_flags); static inline int receive_fd_user(struct file *file, int __user *ufd, unsigned int o_flags) { if (ufd == NULL) return -EFAULT; return __receive_fd(-1, file, ufd, o_flags); } static inline int receive_fd(struct file *file, unsigned int o_flags) { return __receive_fd(-1, file, NULL, o_flags); } static inline int receive_fd_replace(int fd, struct file *file, unsigned int o_flags) { return __receive_fd(fd, file, NULL, o_flags); } extern void flush_delayed_fput(void); extern void __fput_sync(struct file *); extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max; #endif /* __LINUX_FILE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for inet_sock * * Authors: Many, reorganised here by * Arnaldo Carvalho de Melo <acme@mandriva.com> */ #ifndef _INET_SOCK_H #define _INET_SOCK_H #include <linux/bitops.h> #include <linux/string.h> #include <linux/types.h> #include <linux/jhash.h> #include <linux/netdevice.h> #include <net/flow.h> #include <net/sock.h> #include <net/request_sock.h> #include <net/netns/hash.h> #include <net/tcp_states.h> #include <net/l3mdev.h> /** struct ip_options - IP Options * * @faddr - Saved first hop address * @nexthop - Saved nexthop address in LSRR and SSRR * @is_strictroute - Strict source route * @srr_is_hit - Packet destination addr was our one * @is_changed - IP checksum more not valid * @rr_needaddr - Need to record addr of outgoing dev * @ts_needtime - Need to record timestamp * @ts_needaddr - Need to record addr of outgoing dev */ struct ip_options { __be32 faddr; __be32 nexthop; unsigned char optlen; unsigned char srr; unsigned char rr; unsigned char ts; unsigned char is_strictroute:1, srr_is_hit:1, is_changed:1, rr_needaddr:1, ts_needtime:1, ts_needaddr:1; unsigned char router_alert; unsigned char cipso; unsigned char __pad2; unsigned char __data[]; }; struct ip_options_rcu { struct rcu_head rcu; struct ip_options opt; }; struct ip_options_data { struct ip_options_rcu opt; char data[40]; }; struct inet_request_sock { struct request_sock req; #define ir_loc_addr req.__req_common.skc_rcv_saddr #define ir_rmt_addr req.__req_common.skc_daddr #define ir_num req.__req_common.skc_num #define ir_rmt_port req.__req_common.skc_dport #define ir_v6_rmt_addr req.__req_common.skc_v6_daddr #define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr #define ir_iif req.__req_common.skc_bound_dev_if #define ir_cookie req.__req_common.skc_cookie #define ireq_net req.__req_common.skc_net #define ireq_state req.__req_common.skc_state #define ireq_family req.__req_common.skc_family u16 snd_wscale : 4, rcv_wscale : 4, tstamp_ok : 1, sack_ok : 1, wscale_ok : 1, ecn_ok : 1, acked : 1, no_srccheck: 1, smc_ok : 1; u32 ir_mark; union { struct ip_options_rcu __rcu *ireq_opt; #if IS_ENABLED(CONFIG_IPV6) struct { struct ipv6_txoptions *ipv6_opt; struct sk_buff *pktopts; }; #endif }; }; static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) { return (struct inet_request_sock *)sk; } static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) { if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) return skb->mark; return sk->sk_mark; } static inline int inet_request_bound_dev_if(const struct sock *sk, struct sk_buff *skb) { #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); if (!sk->sk_bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept) return l3mdev_master_ifindex_by_index(net, skb->skb_iif); #endif return sk->sk_bound_dev_if; } static inline int inet_sk_bound_l3mdev(const struct sock *sk) { #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); if (!net->ipv4.sysctl_tcp_l3mdev_accept) return l3mdev_master_ifindex_by_index(net, sk->sk_bound_dev_if); #endif return 0; } static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if, int dif, int sdif) { if (!bound_dev_if) return !sdif || l3mdev_accept; return bound_dev_if == dif || bound_dev_if == sdif; } struct inet_cork { unsigned int flags; __be32 addr; struct ip_options *opt; unsigned int fragsize; int length; /* Total length of all frames */ struct dst_entry *dst; u8 tx_flags; __u8 ttl; __s16 tos; char priority; __u16 gso_size; u64 transmit_time; u32 mark; }; struct inet_cork_full { struct inet_cork base; struct flowi fl; }; struct ip_mc_socklist; struct ipv6_pinfo; struct rtable; /** struct inet_sock - representation of INET sockets * * @sk - ancestor class * @pinet6 - pointer to IPv6 control block * @inet_daddr - Foreign IPv4 addr * @inet_rcv_saddr - Bound local IPv4 addr * @inet_dport - Destination port * @inet_num - Local port * @inet_saddr - Sending source * @uc_ttl - Unicast TTL * @inet_sport - Source port * @inet_id - ID counter for DF pkts * @tos - TOS * @mc_ttl - Multicasting TTL * @is_icsk - is this an inet_connection_sock? * @uc_index - Unicast outgoing device index * @mc_index - Multicast device index * @mc_list - Group array * @cork - info to build ip hdr on each ip frag while socket is corked */ struct inet_sock { /* sk and pinet6 has to be the first two members of inet_sock */ struct sock sk; #if IS_ENABLED(CONFIG_IPV6) struct ipv6_pinfo *pinet6; #endif /* Socket demultiplex comparisons on incoming packets. */ #define inet_daddr sk.__sk_common.skc_daddr #define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr #define inet_dport sk.__sk_common.skc_dport #define inet_num sk.__sk_common.skc_num __be32 inet_saddr; __s16 uc_ttl; __u16 cmsg_flags; __be16 inet_sport; __u16 inet_id; struct ip_options_rcu __rcu *inet_opt; int rx_dst_ifindex; __u8 tos; __u8 min_ttl; __u8 mc_ttl; __u8 pmtudisc; __u8 recverr:1, is_icsk:1, freebind:1, hdrincl:1, mc_loop:1, transparent:1, mc_all:1, nodefrag:1; __u8 bind_address_no_port:1, recverr_rfc4884:1, defer_connect:1; /* Indicates that fastopen_connect is set * and cookie exists so we defer connect * until first data frame is written */ __u8 rcv_tos; __u8 convert_csum; int uc_index; int mc_index; __be32 mc_addr; struct ip_mc_socklist __rcu *mc_list; struct inet_cork_full cork; }; #define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ #define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */ /* cmsg flags for inet */ #define IP_CMSG_PKTINFO BIT(0) #define IP_CMSG_TTL BIT(1) #define IP_CMSG_TOS BIT(2) #define IP_CMSG_RECVOPTS BIT(3) #define IP_CMSG_RETOPTS BIT(4) #define IP_CMSG_PASSSEC BIT(5) #define IP_CMSG_ORIGDSTADDR BIT(6) #define IP_CMSG_CHECKSUM BIT(7) #define IP_CMSG_RECVFRAGSIZE BIT(8) /** * sk_to_full_sk - Access to a full socket * @sk: pointer to a socket * * SYNACK messages might be attached to request sockets. * Some places want to reach the listener in this case. */ static inline struct sock *sk_to_full_sk(struct sock *sk) { #ifdef CONFIG_INET if (sk && sk->sk_state == TCP_NEW_SYN_RECV) sk = inet_reqsk(sk)->rsk_listener; #endif return sk; } /* sk_to_full_sk() variant with a const argument */ static inline const struct sock *sk_const_to_full_sk(const struct sock *sk) { #ifdef CONFIG_INET if (sk && sk->sk_state == TCP_NEW_SYN_RECV) sk = ((const struct request_sock *)sk)->rsk_listener; #endif return sk; } static inline struct sock *skb_to_full_sk(const struct sk_buff *skb) { return sk_to_full_sk(skb->sk); } static inline struct inet_sock *inet_sk(const struct sock *sk) { return (struct inet_sock *)sk; } static inline void __inet_sk_copy_descendant(struct sock *sk_to, const struct sock *sk_from, const int ancestor_size) { memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1, sk_from->sk_prot->obj_size - ancestor_size); } int inet_sk_rebuild_header(struct sock *sk); /** * inet_sk_state_load - read sk->sk_state for lockless contexts * @sk: socket pointer * * Paired with inet_sk_state_store(). Used in places we don't hold socket lock: * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ... */ static inline int inet_sk_state_load(const struct sock *sk) { /* state change might impact lockless readers. */ return smp_load_acquire(&sk->sk_state); } /** * inet_sk_state_store - update sk->sk_state * @sk: socket pointer * @newstate: new state * * Paired with inet_sk_state_load(). Should be used in contexts where * state change might impact lockless readers. */ void inet_sk_state_store(struct sock *sk, int newstate); void inet_sk_set_state(struct sock *sk, int state); static inline unsigned int __inet_ehashfn(const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport, u32 initval) { return jhash_3words((__force __u32) laddr, (__force __u32) faddr, ((__u32) lport) << 16 | (__force __u32)fport, initval); } struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, bool attach_listener); static inline __u8 inet_sk_flowi_flags(const struct sock *sk) { __u8 flags = 0; if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl) flags |= FLOWI_FLAG_ANYSRC; return flags; } static inline void inet_inc_convert_csum(struct sock *sk) { inet_sk(sk)->convert_csum++; } static inline void inet_dec_convert_csum(struct sock *sk) { if (inet_sk(sk)->convert_csum > 0) inet_sk(sk)->convert_csum--; } static inline bool inet_get_convert_csum(struct sock *sk) { return !!inet_sk(sk)->convert_csum; } static inline bool inet_can_nonlocal_bind(struct net *net, struct inet_sock *inet) { return net->ipv4.sysctl_ip_nonlocal_bind || inet->freebind || inet->transparent; } #endif /* _INET_SOCK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_ATOMIC_H #define _ASM_X86_ATOMIC_H #include <linux/compiler.h> #include <linux/types.h> #include <asm/alternative.h> #include <asm/cmpxchg.h> #include <asm/rmwcc.h> #include <asm/barrier.h> /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc.. */ /** * arch_atomic_read - read atomic variable * @v: pointer of type atomic_t * * Atomically reads the value of @v. */ static __always_inline int arch_atomic_read(const atomic_t *v) { /* * Note for KASAN: we deliberately don't use READ_ONCE_NOCHECK() here, * it's non-inlined function that increases binary size and stack usage. */ return __READ_ONCE((v)->counter); } /** * arch_atomic_set - set atomic variable * @v: pointer of type atomic_t * @i: required value * * Atomically sets the value of @v to @i. */ static __always_inline void arch_atomic_set(atomic_t *v, int i) { __WRITE_ONCE(v->counter, i); } /** * arch_atomic_add - add integer to atomic variable * @i: integer value to add * @v: pointer of type atomic_t * * Atomically adds @i to @v. */ static __always_inline void arch_atomic_add(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "addl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } /** * arch_atomic_sub - subtract integer from atomic variable * @i: integer value to subtract * @v: pointer of type atomic_t * * Atomically subtracts @i from @v. */ static __always_inline void arch_atomic_sub(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "subl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } /** * arch_atomic_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @v: pointer of type atomic_t * * Atomically subtracts @i from @v and returns * true if the result is zero, or false for all * other cases. */ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, e, "er", i); } #define arch_atomic_sub_and_test arch_atomic_sub_and_test /** * arch_atomic_inc - increment atomic variable * @v: pointer of type atomic_t * * Atomically increments @v by 1. */ static __always_inline void arch_atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" : "+m" (v->counter) :: "memory"); } #define arch_atomic_inc arch_atomic_inc /** * arch_atomic_dec - decrement atomic variable * @v: pointer of type atomic_t * * Atomically decrements @v by 1. */ static __always_inline void arch_atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" : "+m" (v->counter) :: "memory"); } #define arch_atomic_dec arch_atomic_dec /** * arch_atomic_dec_and_test - decrement and test * @v: pointer of type atomic_t * * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other * cases. */ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, e); } #define arch_atomic_dec_and_test arch_atomic_dec_and_test /** * arch_atomic_inc_and_test - increment and test * @v: pointer of type atomic_t * * Atomically increments @v by 1 * and returns true if the result is zero, or false for all * other cases. */ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, e); } #define arch_atomic_inc_and_test arch_atomic_inc_and_test /** * arch_atomic_add_negative - add and test if negative * @i: integer value to add * @v: pointer of type atomic_t * * Atomically adds @i to @v and returns true * if the result is negative, or false when * result is greater than or equal to zero. */ static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, s, "er", i); } #define arch_atomic_add_negative arch_atomic_add_negative /** * arch_atomic_add_return - add integer and return * @i: integer value to add * @v: pointer of type atomic_t * * Atomically adds @i to @v and returns @i + @v */ static __always_inline int arch_atomic_add_return(int i, atomic_t *v) { return i + xadd(&v->counter, i); } #define arch_atomic_add_return arch_atomic_add_return /** * arch_atomic_sub_return - subtract integer and return * @v: pointer of type atomic_t * @i: integer value to subtract * * Atomically subtracts @i from @v and returns @v - @i */ static __always_inline int arch_atomic_sub_return(int i, atomic_t *v) { return arch_atomic_add_return(-i, v); } #define arch_atomic_sub_return arch_atomic_sub_return static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) { return xadd(&v->counter, i); } #define arch_atomic_fetch_add arch_atomic_fetch_add static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v) { return xadd(&v->counter, -i); } #define arch_atomic_fetch_sub arch_atomic_fetch_sub static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { return arch_cmpxchg(&v->counter, old, new); } #define arch_atomic_cmpxchg arch_atomic_cmpxchg static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new) { return try_cmpxchg(&v->counter, old, new); } #define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg static __always_inline int arch_atomic_xchg(atomic_t *v, int new) { return arch_xchg(&v->counter, new); } #define arch_atomic_xchg arch_atomic_xchg static __always_inline void arch_atomic_and(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "andl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v) { int val = arch_atomic_read(v); do { } while (!arch_atomic_try_cmpxchg(v, &val, val & i)); return val; } #define arch_atomic_fetch_and arch_atomic_fetch_and static __always_inline void arch_atomic_or(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "orl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } static __always_inline int arch_atomic_fetch_or(int i, atomic_t *v) { int val = arch_atomic_read(v); do { } while (!arch_atomic_try_cmpxchg(v, &val, val | i)); return val; } #define arch_atomic_fetch_or arch_atomic_fetch_or static __always_inline void arch_atomic_xor(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "xorl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } static __always_inline int arch_atomic_fetch_xor(int i, atomic_t *v) { int val = arch_atomic_read(v); do { } while (!arch_atomic_try_cmpxchg(v, &val, val ^ i)); return val; } #define arch_atomic_fetch_xor arch_atomic_fetch_xor #ifdef CONFIG_X86_32 # include <asm/atomic64_32.h> #else # include <asm/atomic64_64.h> #endif #define ARCH_ATOMIC #endif /* _ASM_X86_ATOMIC_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 /* SPDX-License-Identifier: GPL-2.0 */ /* * This file provides wrappers with sanitizer instrumentation for non-atomic * bit operations. * * To use this functionality, an arch's bitops.h file needs to define each of * the below bit operations with an arch_ prefix (e.g. arch_set_bit(), * arch___set_bit(), etc.). */ #ifndef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H #define _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H #include <linux/instrumented.h> /** * __set_bit - Set a bit in memory * @nr: the bit to set * @addr: the address to start counting from * * Unlike set_bit(), this function is non-atomic. If it is called on the same * region of memory concurrently, the effect may be that only one operation * succeeds. */ static inline void __set_bit(long nr, volatile unsigned long *addr) { instrument_write(addr + BIT_WORD(nr), sizeof(long)); arch___set_bit(nr, addr); } /** * __clear_bit - Clears a bit in memory * @nr: the bit to clear * @addr: the address to start counting from * * Unlike clear_bit(), this function is non-atomic. If it is called on the same * region of memory concurrently, the effect may be that only one operation * succeeds. */ static inline void __clear_bit(long nr, volatile unsigned long *addr) { instrument_write(addr + BIT_WORD(nr), sizeof(long)); arch___clear_bit(nr, addr); } /** * __change_bit - Toggle a bit in memory * @nr: the bit to change * @addr: the address to start counting from * * Unlike change_bit(), this function is non-atomic. If it is called on the same * region of memory concurrently, the effect may be that only one operation * succeeds. */ static inline void __change_bit(long nr, volatile unsigned long *addr) { instrument_write(addr + BIT_WORD(nr), sizeof(long)); arch___change_bit(nr, addr); } static inline void __instrument_read_write_bitop(long nr, volatile unsigned long *addr) { if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC)) { /* * We treat non-atomic read-write bitops a little more special. * Given the operations here only modify a single bit, assuming * non-atomicity of the writer is sufficient may be reasonable * for certain usage (and follows the permissible nature of the * assume-plain-writes-atomic rule): * 1. report read-modify-write races -> check read; * 2. do not report races with marked readers, but do report * races with unmarked readers -> check "atomic" write. */ kcsan_check_read(addr + BIT_WORD(nr), sizeof(long)); /* * Use generic write instrumentation, in case other sanitizers * or tools are enabled alongside KCSAN. */ instrument_write(addr + BIT_WORD(nr), sizeof(long)); } else { instrument_read_write(addr + BIT_WORD(nr), sizeof(long)); } } /** * __test_and_set_bit - Set a bit and return its old value * @nr: Bit to set * @addr: Address to count from * * This operation is non-atomic. If two instances of this operation race, one * can appear to succeed but actually fail. */ static inline bool __test_and_set_bit(long nr, volatile unsigned long *addr) { __instrument_read_write_bitop(nr, addr); return arch___test_and_set_bit(nr, addr); } /** * __test_and_clear_bit - Clear a bit and return its old value * @nr: Bit to clear * @addr: Address to count from * * This operation is non-atomic. If two instances of this operation race, one * can appear to succeed but actually fail. */ static inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr) { __instrument_read_write_bitop(nr, addr); return arch___test_and_clear_bit(nr, addr); } /** * __test_and_change_bit - Change a bit and return its old value * @nr: Bit to change * @addr: Address to count from * * This operation is non-atomic. If two instances of this operation race, one * can appear to succeed but actually fail. */ static inline bool __test_and_change_bit(long nr, volatile unsigned long *addr) { __instrument_read_write_bitop(nr, addr); return arch___test_and_change_bit(nr, addr); } /** * test_bit - Determine whether a bit is set * @nr: bit number to test * @addr: Address to start counting from */ static inline bool test_bit(long nr, const volatile unsigned long *addr) { instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); return arch_test_bit(nr, addr); } #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM block #if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_BLOCK_H #include <linux/blktrace_api.h> #include <linux/blkdev.h> #include <linux/buffer_head.h> #include <linux/tracepoint.h> #define RWBS_LEN 8 DECLARE_EVENT_CLASS(block_buffer, TP_PROTO(struct buffer_head *bh), TP_ARGS(bh), TP_STRUCT__entry ( __field( dev_t, dev ) __field( sector_t, sector ) __field( size_t, size ) ), TP_fast_assign( __entry->dev = bh->b_bdev->bd_dev; __entry->sector = bh->b_blocknr; __entry->size = bh->b_size; ), TP_printk("%d,%d sector=%llu size=%zu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->sector, __entry->size ) ); /** * block_touch_buffer - mark a buffer accessed * @bh: buffer_head being touched * * Called from touch_buffer(). */ DEFINE_EVENT(block_buffer, block_touch_buffer, TP_PROTO(struct buffer_head *bh), TP_ARGS(bh) ); /** * block_dirty_buffer - mark a buffer dirty * @bh: buffer_head being dirtied * * Called from mark_buffer_dirty(). */ DEFINE_EVENT(block_buffer, block_dirty_buffer, TP_PROTO(struct buffer_head *bh), TP_ARGS(bh) ); /** * block_rq_requeue - place block IO request back on a queue * @q: queue holding operation * @rq: block IO operation request * * The block operation request @rq is being placed back into queue * @q. For some reason the request was not completed and needs to be * put back in the queue. */ TRACE_EVENT(block_rq_requeue, TP_PROTO(struct request_queue *q, struct request *rq), TP_ARGS(q, rq), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); __get_str(cmd)[0] = '\0'; ), TP_printk("%d,%d %s (%s) %llu + %u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), (unsigned long long)__entry->sector, __entry->nr_sector, 0) ); /** * block_rq_complete - block IO operation completed by device driver * @rq: block operations request * @error: status code * @nr_bytes: number of completed bytes * * The block_rq_complete tracepoint event indicates that some portion * of operation request has been completed by the device driver. If * the @rq->bio is %NULL, then there is absolutely no additional work to * do for the request. If @rq->bio is non-NULL then there is * additional work required to complete the request. */ TRACE_EVENT(block_rq_complete, TP_PROTO(struct request *rq, int error, unsigned int nr_bytes), TP_ARGS(rq, error, nr_bytes), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( int, error ) __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; __entry->sector = blk_rq_pos(rq); __entry->nr_sector = nr_bytes >> 9; __entry->error = error; blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes); __get_str(cmd)[0] = '\0'; ), TP_printk("%d,%d %s (%s) %llu + %u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), (unsigned long long)__entry->sector, __entry->nr_sector, __entry->error) ); DECLARE_EVENT_CLASS(block_rq, TP_PROTO(struct request_queue *q, struct request *rq), TP_ARGS(q, rq), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( unsigned int, bytes ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); __entry->bytes = blk_rq_bytes(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); __get_str(cmd)[0] = '\0'; memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __entry->bytes, __get_str(cmd), (unsigned long long)__entry->sector, __entry->nr_sector, __entry->comm) ); /** * block_rq_insert - insert block operation request into queue * @q: target queue * @rq: block IO operation request * * Called immediately before block operation request @rq is inserted * into queue @q. The fields in the operation request @rq struct can * be examined to determine which device and sectors the pending * operation would access. */ DEFINE_EVENT(block_rq, block_rq_insert, TP_PROTO(struct request_queue *q, struct request *rq), TP_ARGS(q, rq) ); /** * block_rq_issue - issue pending block IO request operation to device driver * @q: queue holding operation * @rq: block IO operation operation request * * Called when block operation request @rq from queue @q is sent to a * device driver for processing. */ DEFINE_EVENT(block_rq, block_rq_issue, TP_PROTO(struct request_queue *q, struct request *rq), TP_ARGS(q, rq) ); /** * block_rq_merge - merge request with another one in the elevator * @q: queue holding operation * @rq: block IO operation operation request * * Called when block operation request @rq from queue @q is merged to another * request queued in the elevator. */ DEFINE_EVENT(block_rq, block_rq_merge, TP_PROTO(struct request_queue *q, struct request *rq), TP_ARGS(q, rq) ); /** * block_bio_bounce - used bounce buffer when processing block operation * @q: queue holding the block operation * @bio: block operation * * A bounce buffer was used to handle the block operation @bio in @q. * This occurs when hardware limitations prevent a direct transfer of * data between the @bio data memory area and the IO device. Use of a * bounce buffer requires extra copying of data and decreases * performance. */ TRACE_EVENT(block_bio_bounce, TP_PROTO(struct request_queue *q, struct bio *bio), TP_ARGS(q, bio), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->comm) ); /** * block_bio_complete - completed all work on the block operation * @q: queue holding the block operation * @bio: block operation completed * * This tracepoint indicates there is no further work to do on this * block IO operation @bio. */ TRACE_EVENT(block_bio_complete, TP_PROTO(struct request_queue *q, struct bio *bio), TP_ARGS(q, bio), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned, nr_sector ) __field( int, error ) __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); __entry->error = blk_status_to_errno(bio->bi_status); blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); ), TP_printk("%d,%d %s %llu + %u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->error) ); DECLARE_EVENT_CLASS(block_bio_merge, TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), TP_ARGS(q, rq, bio), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->comm) ); /** * block_bio_backmerge - merging block operation to the end of an existing operation * @q: queue holding operation * @rq: request bio is being merged into * @bio: new block operation to merge * * Merging block request @bio to the end of an existing block request * in queue @q. */ DEFINE_EVENT(block_bio_merge, block_bio_backmerge, TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), TP_ARGS(q, rq, bio) ); /** * block_bio_frontmerge - merging block operation to the beginning of an existing operation * @q: queue holding operation * @rq: request bio is being merged into * @bio: new block operation to merge * * Merging block IO operation @bio to the beginning of an existing block * operation in queue @q. */ DEFINE_EVENT(block_bio_merge, block_bio_frontmerge, TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), TP_ARGS(q, rq, bio) ); /** * block_bio_queue - putting new block IO operation in queue * @q: queue holding operation * @bio: new block operation * * About to place the block IO operation @bio into queue @q. */ TRACE_EVENT(block_bio_queue, TP_PROTO(struct request_queue *q, struct bio *bio), TP_ARGS(q, bio), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->comm) ); DECLARE_EVENT_CLASS(block_get_rq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), TP_ARGS(q, bio, rw), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->dev = bio ? bio_dev(bio) : 0; __entry->sector = bio ? bio->bi_iter.bi_sector : 0; __entry->nr_sector = bio ? bio_sectors(bio) : 0; blk_fill_rwbs(__entry->rwbs, bio ? bio->bi_opf : 0, __entry->nr_sector); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->comm) ); /** * block_getrq - get a free request entry in queue for block IO operations * @q: queue for operations * @bio: pending block IO operation (can be %NULL) * @rw: low bit indicates a read (%0) or a write (%1) * * A request struct for queue @q has been allocated to handle the * block IO operation @bio. */ DEFINE_EVENT(block_get_rq, block_getrq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), TP_ARGS(q, bio, rw) ); /** * block_sleeprq - waiting to get a free request entry in queue for block IO operation * @q: queue for operation * @bio: pending block IO operation (can be %NULL) * @rw: low bit indicates a read (%0) or a write (%1) * * In the case where a request struct cannot be provided for queue @q * the process needs to wait for an request struct to become * available. This tracepoint event is generated each time the * process goes to sleep waiting for request struct become available. */ DEFINE_EVENT(block_get_rq, block_sleeprq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), TP_ARGS(q, bio, rw) ); /** * block_plug - keep operations requests in request queue * @q: request queue to plug * * Plug the request queue @q. Do not allow block operation requests * to be sent to the device driver. Instead, accumulate requests in * the queue to improve throughput performance of the block device. */ TRACE_EVENT(block_plug, TP_PROTO(struct request_queue *q), TP_ARGS(q), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("[%s]", __entry->comm) ); DECLARE_EVENT_CLASS(block_unplug, TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit), TP_ARGS(q, depth, explicit), TP_STRUCT__entry( __field( int, nr_rq ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->nr_rq = depth; memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) ); /** * block_unplug - release of operations requests in request queue * @q: request queue to unplug * @depth: number of requests just added to the queue * @explicit: whether this was an explicit unplug, or one from schedule() * * Unplug request queue @q because device driver is scheduled to work * on elements in the request queue. */ DEFINE_EVENT(block_unplug, block_unplug, TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit), TP_ARGS(q, depth, explicit) ); /** * block_split - split a single bio struct into two bio structs * @q: queue containing the bio * @bio: block operation being split * @new_sector: The starting sector for the new bio * * The bio request @bio in request queue @q needs to be split into two * bio requests. The newly created @bio request starts at * @new_sector. This split may be required due to hardware limitation * such as operation crossing device boundaries in a RAID system. */ TRACE_EVENT(block_split, TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int new_sector), TP_ARGS(q, bio, new_sector), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( sector_t, new_sector ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->new_sector = new_sector; blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %llu / %llu [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, (unsigned long long)__entry->new_sector, __entry->comm) ); /** * block_bio_remap - map request for a logical device to the raw device * @q: queue holding the operation * @bio: revised operation * @dev: device for the operation * @from: original sector for the operation * * An operation for a logical device has been mapped to the * raw block device. */ TRACE_EVENT(block_bio_remap, TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, sector_t from), TP_ARGS(q, bio, dev, from), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( dev_t, old_dev ) __field( sector_t, old_sector ) __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); __entry->old_dev = dev; __entry->old_sector = from; blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); ), TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, MAJOR(__entry->old_dev), MINOR(__entry->old_dev), (unsigned long long)__entry->old_sector) ); /** * block_rq_remap - map request for a block operation request * @q: queue holding the operation * @rq: block IO operation request * @dev: device for the operation * @from: original sector for the operation * * The block operation request @rq in @q has been remapped. The block * operation request @rq holds the current information and @from hold * the original sector. */ TRACE_EVENT(block_rq_remap, TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev, sector_t from), TP_ARGS(q, rq, dev, from), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( dev_t, old_dev ) __field( sector_t, old_sector ) __field( unsigned int, nr_bios ) __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( __entry->dev = disk_devt(rq->rq_disk); __entry->sector = blk_rq_pos(rq); __entry->nr_sector = blk_rq_sectors(rq); __entry->old_dev = dev; __entry->old_sector = from; __entry->nr_bios = blk_rq_count_bios(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); ), TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, MAJOR(__entry->old_dev), MINOR(__entry->old_dev), (unsigned long long)__entry->old_sector, __entry->nr_bios) ); #endif /* _TRACE_BLOCK_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_COREDUMP_H #define _LINUX_SCHED_COREDUMP_H #include <linux/mm_types.h> #define SUID_DUMP_DISABLE 0 /* No setuid dumping */ #define SUID_DUMP_USER 1 /* Dump as user of process */ #define SUID_DUMP_ROOT 2 /* Dump as root */ /* mm flags */ /* for SUID_DUMP_* above */ #define MMF_DUMPABLE_BITS 2 #define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) extern void set_dumpable(struct mm_struct *mm, int value); /* * This returns the actual value of the suid_dumpable flag. For things * that are using this for checking for privilege transitions, it must * test against SUID_DUMP_USER rather than treating it as a boolean * value. */ static inline int __get_dumpable(unsigned long mm_flags) { return mm_flags & MMF_DUMPABLE_MASK; } static inline int get_dumpable(struct mm_struct *mm) { return __get_dumpable(mm->flags); } /* coredump filter bits */ #define MMF_DUMP_ANON_PRIVATE 2 #define MMF_DUMP_ANON_SHARED 3 #define MMF_DUMP_MAPPED_PRIVATE 4 #define MMF_DUMP_MAPPED_SHARED 5 #define MMF_DUMP_ELF_HEADERS 6 #define MMF_DUMP_HUGETLB_PRIVATE 7 #define MMF_DUMP_HUGETLB_SHARED 8 #define MMF_DUMP_DAX_PRIVATE 9 #define MMF_DUMP_DAX_SHARED 10 #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS #define MMF_DUMP_FILTER_BITS 9 #define MMF_DUMP_FILTER_MASK \ (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) #define MMF_DUMP_FILTER_DEFAULT \ ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) #ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS # define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) #else # define MMF_DUMP_MASK_DEFAULT_ELF 0 #endif /* leave room for more dump flags */ #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ /* * This one-shot flag is dropped due to necessity of changing exe once again * on NFS restore */ //#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ #define MMF_HAS_UPROBES 19 /* has uprobes */ #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ #define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ #define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ #define MMF_MULTIPROCESS 27 /* mm is shared between processes */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ MMF_DISABLE_THP_MASK) #endif /* _LINUX_SCHED_COREDUMP_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _DELAYED_CALL_H #define _DELAYED_CALL_H /* * Poor man's closures; I wish we could've done them sanely polymorphic, * but... */ struct delayed_call { void (*fn)(void *); void *arg; }; #define DEFINE_DELAYED_CALL(name) struct delayed_call name = {NULL, NULL} /* I really wish we had closures with sane typechecking... */ static inline void set_delayed_call(struct delayed_call *call, void (*fn)(void *), void *arg) { call->fn = fn; call->arg = arg; } static inline void do_delayed_call(struct delayed_call *call) { if (call->fn) call->fn(call->arg); } static inline void clear_delayed_call(struct delayed_call *call) { call->fn = NULL; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com * Written by Alex Tomas <alex@clusterfs.com> */ #ifndef _EXT4_EXTENTS #define _EXT4_EXTENTS #include "ext4.h" /* * With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks * becomes very small, so index split, in-depth growing and * other hard changes happen much more often. * This is for debug purposes only. */ #define AGGRESSIVE_TEST_ /* * With EXTENTS_STATS defined, the number of blocks and extents * are collected in the truncate path. They'll be shown at * umount time. */ #define EXTENTS_STATS__ /* * If CHECK_BINSEARCH is defined, then the results of the binary search * will also be checked by linear search. */ #define CHECK_BINSEARCH__ /* * If EXT_STATS is defined then stats numbers are collected. * These number will be displayed at umount time. */ #define EXT_STATS_ /* * ext4_inode has i_block array (60 bytes total). * The first 12 bytes store ext4_extent_header; * the remainder stores an array of ext4_extent. * For non-inode extent blocks, ext4_extent_tail * follows the array. */ /* * This is the extent tail on-disk structure. * All other extent structures are 12 bytes long. It turns out that * block_size % 12 >= 4 for at least all powers of 2 greater than 512, which * covers all valid ext4 block sizes. Therefore, this tail structure can be * crammed into the end of the block without having to rebalance the tree. */ struct ext4_extent_tail { __le32 et_checksum; /* crc32c(uuid+inum+extent_block) */ }; /* * This is the extent on-disk structure. * It's used at the bottom of the tree. */ struct ext4_extent { __le32 ee_block; /* first logical block extent covers */ __le16 ee_len; /* number of blocks covered by extent */ __le16 ee_start_hi; /* high 16 bits of physical block */ __le32 ee_start_lo; /* low 32 bits of physical block */ }; /* * This is index on-disk structure. * It's used at all the levels except the bottom. */ struct ext4_extent_idx { __le32 ei_block; /* index covers logical blocks from 'block' */ __le32 ei_leaf_lo; /* pointer to the physical block of the next * * level. leaf or next index could be there */ __le16 ei_leaf_hi; /* high 16 bits of physical block */ __u16 ei_unused; }; /* * Each block (leaves and indexes), even inode-stored has header. */ struct ext4_extent_header { __le16 eh_magic; /* probably will support different formats */ __le16 eh_entries; /* number of valid entries */ __le16 eh_max; /* capacity of store in entries */ __le16 eh_depth; /* has tree real underlying blocks? */ __le32 eh_generation; /* generation of the tree */ }; #define EXT4_EXT_MAGIC cpu_to_le16(0xf30a) #define EXT4_MAX_EXTENT_DEPTH 5 #define EXT4_EXTENT_TAIL_OFFSET(hdr) \ (sizeof(struct ext4_extent_header) + \ (sizeof(struct ext4_extent) * le16_to_cpu((hdr)->eh_max))) static inline struct ext4_extent_tail * find_ext4_extent_tail(struct ext4_extent_header *eh) { return (struct ext4_extent_tail *)(((void *)eh) + EXT4_EXTENT_TAIL_OFFSET(eh)); } /* * Array of ext4_ext_path contains path to some extent. * Creation/lookup routines use it for traversal/splitting/etc. * Truncate uses it to simulate recursive walking. */ struct ext4_ext_path { ext4_fsblk_t p_block; __u16 p_depth; __u16 p_maxdepth; struct ext4_extent *p_ext; struct ext4_extent_idx *p_idx; struct ext4_extent_header *p_hdr; struct buffer_head *p_bh; }; /* * Used to record a portion of a cluster found at the beginning or end * of an extent while traversing the extent tree during space removal. * A partial cluster may be removed if it does not contain blocks shared * with extents that aren't being deleted (tofree state). Otherwise, * it cannot be removed (nofree state). */ struct partial_cluster { ext4_fsblk_t pclu; /* physical cluster number */ ext4_lblk_t lblk; /* logical block number within logical cluster */ enum {initial, tofree, nofree} state; }; /* * structure for external API */ /* * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an * initialized extent. This is 2^15 and not (2^16 - 1), since we use the * MSB of ee_len field in the extent datastructure to signify if this * particular extent is an initialized extent or an unwritten (i.e. * preallocated). * EXT_UNWRITTEN_MAX_LEN is the maximum number of blocks we can have in an * unwritten extent. * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an * unwritten one. In other words, if MSB of ee_len is set, it is an * unwritten extent with only one special scenario when ee_len = 0x8000. * In this case we can not have an unwritten extent of zero length and * thus we make it as a special case of initialized extent with 0x8000 length. * This way we get better extent-to-group alignment for initialized extents. * Hence, the maximum number of blocks we can have in an *initialized* * extent is 2^15 (32768) and in an *unwritten* extent is 2^15-1 (32767). */ #define EXT_INIT_MAX_LEN (1UL << 15) #define EXT_UNWRITTEN_MAX_LEN (EXT_INIT_MAX_LEN - 1) #define EXT_FIRST_EXTENT(__hdr__) \ ((struct ext4_extent *) (((char *) (__hdr__)) + \ sizeof(struct ext4_extent_header))) #define EXT_FIRST_INDEX(__hdr__) \ ((struct ext4_extent_idx *) (((char *) (__hdr__)) + \ sizeof(struct ext4_extent_header))) #define EXT_HAS_FREE_INDEX(__path__) \ (le16_to_cpu((__path__)->p_hdr->eh_entries) \ < le16_to_cpu((__path__)->p_hdr->eh_max)) #define EXT_LAST_EXTENT(__hdr__) \ (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1) #define EXT_LAST_INDEX(__hdr__) \ (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1) #define EXT_MAX_EXTENT(__hdr__) \ ((le16_to_cpu((__hdr__)->eh_max)) ? \ ((EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)) \ : 0) #define EXT_MAX_INDEX(__hdr__) \ ((le16_to_cpu((__hdr__)->eh_max)) ? \ ((EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)) : 0) static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode) { return (struct ext4_extent_header *) EXT4_I(inode)->i_data; } static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh) { return (struct ext4_extent_header *) bh->b_data; } static inline unsigned short ext_depth(struct inode *inode) { return le16_to_cpu(ext_inode_hdr(inode)->eh_depth); } static inline void ext4_ext_mark_unwritten(struct ext4_extent *ext) { /* We can not have an unwritten extent of zero length! */ BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0); ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN); } static inline int ext4_ext_is_unwritten(struct ext4_extent *ext) { /* Extent with ee_len of 0x8000 is treated as an initialized extent */ return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN); } static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) { return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ? le16_to_cpu(ext->ee_len) : (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); } static inline void ext4_ext_mark_initialized(struct ext4_extent *ext) { ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); } /* * ext4_ext_pblock: * combine low and high parts of physical block number into ext4_fsblk_t */ static inline ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex) { ext4_fsblk_t block; block = le32_to_cpu(ex->ee_start_lo); block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1; return block; } /* * ext4_idx_pblock: * combine low and high parts of a leaf physical block number into ext4_fsblk_t */ static inline ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_idx *ix) { ext4_fsblk_t block; block = le32_to_cpu(ix->ei_leaf_lo); block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1; return block; } /* * ext4_ext_store_pblock: * stores a large physical block number into an extent struct, * breaking it into parts */ static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) { ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); } /* * ext4_idx_store_pblock: * stores a large physical block number into an index struct, * breaking it into parts */ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) { ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); } #endif /* _EXT4_EXTENTS */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 #ifndef INTERNAL_IO_WQ_H #define INTERNAL_IO_WQ_H #include <linux/io_uring.h> struct io_wq; enum { IO_WQ_WORK_CANCEL = 1, IO_WQ_WORK_HASHED = 2, IO_WQ_WORK_UNBOUND = 4, IO_WQ_WORK_NO_CANCEL = 8, IO_WQ_WORK_CONCURRENT = 16, IO_WQ_WORK_FILES = 32, IO_WQ_WORK_FS = 64, IO_WQ_WORK_MM = 128, IO_WQ_WORK_CREDS = 256, IO_WQ_WORK_BLKCG = 512, IO_WQ_WORK_FSIZE = 1024, IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */ }; enum io_wq_cancel { IO_WQ_CANCEL_OK, /* cancelled before started */ IO_WQ_CANCEL_RUNNING, /* found, running, and attempted cancelled */ IO_WQ_CANCEL_NOTFOUND, /* work not found */ }; struct io_wq_work_node { struct io_wq_work_node *next; }; struct io_wq_work_list { struct io_wq_work_node *first; struct io_wq_work_node *last; }; static inline void wq_list_add_after(struct io_wq_work_node *node, struct io_wq_work_node *pos, struct io_wq_work_list *list) { struct io_wq_work_node *next = pos->next; pos->next = node; node->next = next; if (!next) list->last = node; } static inline void wq_list_add_tail(struct io_wq_work_node *node, struct io_wq_work_list *list) { if (!list->first) { list->last = node; WRITE_ONCE(list->first, node); } else { list->last->next = node; list->last = node; } node->next = NULL; } static inline void wq_list_cut(struct io_wq_work_list *list, struct io_wq_work_node *last, struct io_wq_work_node *prev) { /* first in the list, if prev==NULL */ if (!prev) WRITE_ONCE(list->first, last->next); else prev->next = last->next; if (last == list->last) list->last = prev; last->next = NULL; } static inline void wq_list_del(struct io_wq_work_list *list, struct io_wq_work_node *node, struct io_wq_work_node *prev) { wq_list_cut(list, node, prev); } #define wq_list_for_each(pos, prv, head) \ for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next) #define wq_list_empty(list) (READ_ONCE((list)->first) == NULL) #define INIT_WQ_LIST(list) do { \ (list)->first = NULL; \ (list)->last = NULL; \ } while (0) struct io_wq_work { struct io_wq_work_node list; struct io_identity *identity; unsigned flags; }; static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) { if (!work->list.next) return NULL; return container_of(work->list.next, struct io_wq_work, list); } typedef void (free_work_fn)(struct io_wq_work *); typedef struct io_wq_work *(io_wq_work_fn)(struct io_wq_work *); struct io_wq_data { struct user_struct *user; io_wq_work_fn *do_work; free_work_fn *free_work; }; struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data); bool io_wq_get(struct io_wq *wq, struct io_wq_data *data); void io_wq_destroy(struct io_wq *wq); void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); void io_wq_hash_work(struct io_wq_work *work, void *val); static inline bool io_wq_is_hashed(struct io_wq_work *work) { return work->flags & IO_WQ_WORK_HASHED; } void io_wq_cancel_all(struct io_wq *wq); typedef bool (work_cancel_fn)(struct io_wq_work *, void *); enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, void *data, bool cancel_all); struct task_struct *io_wq_get_task(struct io_wq *wq); #if defined(CONFIG_IO_WQ) extern void io_wq_worker_sleeping(struct task_struct *); extern void io_wq_worker_running(struct task_struct *); #else static inline void io_wq_worker_sleeping(struct task_struct *tsk) { } static inline void io_wq_worker_running(struct task_struct *tsk) { } #endif static inline bool io_wq_current_is_worker(void) { return in_task() && (current->flags & PF_IO_WORKER); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * 25-Jul-1998 Major changes to allow for ip chain table * * 3-Jan-2000 Named tables to allow packet selection for different uses. */ /* * Format of an IP6 firewall descriptor * * src, dst, src_mask, dst_mask are always stored in network byte order. * flags are stored in host byte order (of course). * Port numbers are stored in HOST byte order. */ #ifndef _UAPI_IP6_TABLES_H #define _UAPI_IP6_TABLES_H #include <linux/types.h> #include <linux/compiler.h> #include <linux/if.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter/x_tables.h> #ifndef __KERNEL__ #define IP6T_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN #define IP6T_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN #define ip6t_match xt_match #define ip6t_target xt_target #define ip6t_table xt_table #define ip6t_get_revision xt_get_revision #define ip6t_entry_match xt_entry_match #define ip6t_entry_target xt_entry_target #define ip6t_standard_target xt_standard_target #define ip6t_error_target xt_error_target #define ip6t_counters xt_counters #define IP6T_CONTINUE XT_CONTINUE #define IP6T_RETURN XT_RETURN /* Pre-iptables-1.4.0 */ #include <linux/netfilter/xt_tcpudp.h> #define ip6t_tcp xt_tcp #define ip6t_udp xt_udp #define IP6T_TCP_INV_SRCPT XT_TCP_INV_SRCPT #define IP6T_TCP_INV_DSTPT XT_TCP_INV_DSTPT #define IP6T_TCP_INV_FLAGS XT_TCP_INV_FLAGS #define IP6T_TCP_INV_OPTION XT_TCP_INV_OPTION #define IP6T_TCP_INV_MASK XT_TCP_INV_MASK #define IP6T_UDP_INV_SRCPT XT_UDP_INV_SRCPT #define IP6T_UDP_INV_DSTPT XT_UDP_INV_DSTPT #define IP6T_UDP_INV_MASK XT_UDP_INV_MASK #define ip6t_counters_info xt_counters_info #define IP6T_STANDARD_TARGET XT_STANDARD_TARGET #define IP6T_ERROR_TARGET XT_ERROR_TARGET #define IP6T_MATCH_ITERATE(e, fn, args...) \ XT_MATCH_ITERATE(struct ip6t_entry, e, fn, ## args) #define IP6T_ENTRY_ITERATE(entries, size, fn, args...) \ XT_ENTRY_ITERATE(struct ip6t_entry, entries, size, fn, ## args) #endif /* Yes, Virginia, you have to zero the padding. */ struct ip6t_ip6 { /* Source and destination IP6 addr */ struct in6_addr src, dst; /* Mask for src and dest IP6 addr */ struct in6_addr smsk, dmsk; char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; /* Upper protocol number * - The allowed value is 0 (any) or protocol number of last parsable * header, which is 50 (ESP), 59 (No Next Header), 135 (MH), or * the non IPv6 extension headers. * - The protocol numbers of IPv6 extension headers except of ESP and * MH do not match any packets. * - You also need to set IP6T_FLAGS_PROTO to "flags" to check protocol. */ __u16 proto; /* TOS to match iff flags & IP6T_F_TOS */ __u8 tos; /* Flags word */ __u8 flags; /* Inverse flags */ __u8 invflags; }; /* Values for "flag" field in struct ip6t_ip6 (general ip6 structure). */ #define IP6T_F_PROTO 0x01 /* Set if rule cares about upper protocols */ #define IP6T_F_TOS 0x02 /* Match the TOS. */ #define IP6T_F_GOTO 0x04 /* Set if jump is a goto */ #define IP6T_F_MASK 0x07 /* All possible flag bits mask. */ /* Values for "inv" field in struct ip6t_ip6. */ #define IP6T_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ #define IP6T_INV_VIA_OUT 0x02 /* Invert the sense of OUT IFACE */ #define IP6T_INV_TOS 0x04 /* Invert the sense of TOS. */ #define IP6T_INV_SRCIP 0x08 /* Invert the sense of SRC IP. */ #define IP6T_INV_DSTIP 0x10 /* Invert the sense of DST OP. */ #define IP6T_INV_FRAG 0x20 /* Invert the sense of FRAG. */ #define IP6T_INV_PROTO XT_INV_PROTO #define IP6T_INV_MASK 0x7F /* All possible flag bits mask. */ /* This structure defines each of the firewall rules. Consists of 3 parts which are 1) general IP header stuff 2) match specific stuff 3) the target to perform if the rule matches */ struct ip6t_entry { struct ip6t_ip6 ipv6; /* Mark with fields that we care about. */ unsigned int nfcache; /* Size of ipt_entry + matches */ __u16 target_offset; /* Size of ipt_entry + matches + target */ __u16 next_offset; /* Back pointer */ unsigned int comefrom; /* Packet and byte counters. */ struct xt_counters counters; /* The matches (if any), then the target. */ unsigned char elems[0]; }; /* Standard entry */ struct ip6t_standard { struct ip6t_entry entry; struct xt_standard_target target; }; struct ip6t_error { struct ip6t_entry entry; struct xt_error_target target; }; #define IP6T_ENTRY_INIT(__size) \ { \ .target_offset = sizeof(struct ip6t_entry), \ .next_offset = (__size), \ } #define IP6T_STANDARD_INIT(__verdict) \ { \ .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_standard)), \ .target = XT_TARGET_INIT(XT_STANDARD_TARGET, \ sizeof(struct xt_standard_target)), \ .target.verdict = -(__verdict) - 1, \ } #define IP6T_ERROR_INIT \ { \ .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_error)), \ .target = XT_TARGET_INIT(XT_ERROR_TARGET, \ sizeof(struct xt_error_target)), \ .target.errorname = "ERROR", \ } /* * New IP firewall options for [gs]etsockopt at the RAW IP level. * Unlike BSD Linux inherits IP options so you don't have to use * a raw socket for this. Instead we check rights in the calls. * * ATTENTION: check linux/in6.h before adding new number here. */ #define IP6T_BASE_CTL 64 #define IP6T_SO_SET_REPLACE (IP6T_BASE_CTL) #define IP6T_SO_SET_ADD_COUNTERS (IP6T_BASE_CTL + 1) #define IP6T_SO_SET_MAX IP6T_SO_SET_ADD_COUNTERS #define IP6T_SO_GET_INFO (IP6T_BASE_CTL) #define IP6T_SO_GET_ENTRIES (IP6T_BASE_CTL + 1) #define IP6T_SO_GET_REVISION_MATCH (IP6T_BASE_CTL + 4) #define IP6T_SO_GET_REVISION_TARGET (IP6T_BASE_CTL + 5) #define IP6T_SO_GET_MAX IP6T_SO_GET_REVISION_TARGET /* obtain original address if REDIRECT'd connection */ #define IP6T_SO_ORIGINAL_DST 80 /* ICMP matching stuff */ struct ip6t_icmp { __u8 type; /* type to match */ __u8 code[2]; /* range of code */ __u8 invflags; /* Inverse flags */ }; /* Values for "inv" field for struct ipt_icmp. */ #define IP6T_ICMP_INV 0x01 /* Invert the sense of type/code test */ /* The argument to IP6T_SO_GET_INFO */ struct ip6t_getinfo { /* Which table: caller fills this in. */ char name[XT_TABLE_MAXNAMELEN]; /* Kernel fills these in. */ /* Which hook entry points are valid: bitmask */ unsigned int valid_hooks; /* Hook entry points: one per netfilter hook. */ unsigned int hook_entry[NF_INET_NUMHOOKS]; /* Underflow points. */ unsigned int underflow[NF_INET_NUMHOOKS]; /* Number of entries */ unsigned int num_entries; /* Size of entries. */ unsigned int size; }; /* The argument to IP6T_SO_SET_REPLACE. */ struct ip6t_replace { /* Which table. */ char name[XT_TABLE_MAXNAMELEN]; /* Which hook entry points are valid: bitmask. You can't change this. */ unsigned int valid_hooks; /* Number of entries */ unsigned int num_entries; /* Total size of new entries */ unsigned int size; /* Hook entry points. */ unsigned int hook_entry[NF_INET_NUMHOOKS]; /* Underflow points. */ unsigned int underflow[NF_INET_NUMHOOKS]; /* Information about old entries: */ /* Number of counters (must be equal to current number of entries). */ unsigned int num_counters; /* The old entries' counters. */ struct xt_counters __user *counters; /* The entries (hang off end: not really an array). */ struct ip6t_entry entries[0]; }; /* The argument to IP6T_SO_GET_ENTRIES. */ struct ip6t_get_entries { /* Which table: user fills this in. */ char name[XT_TABLE_MAXNAMELEN]; /* User fills this in: total entry size. */ unsigned int size; /* The entries. */ struct ip6t_entry entrytable[0]; }; /* Helper functions */ static __inline__ struct xt_entry_target * ip6t_get_target(struct ip6t_entry *e) { return (struct xt_entry_target *)((char *)e + e->target_offset); } /* * Main firewall chains definitions and global var's definitions. */ #endif /* _UAPI_IP6_TABLES_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 // SPDX-License-Identifier: GPL-2.0 #ifndef _LINUX_KERNEL_TRACE_H #define _LINUX_KERNEL_TRACE_H #include <linux/fs.h> #include <linux/atomic.h> #include <linux/sched.h> #include <linux/clocksource.h> #include <linux/ring_buffer.h> #include <linux/mmiotrace.h> #include <linux/tracepoint.h> #include <linux/ftrace.h> #include <linux/trace.h> #include <linux/hw_breakpoint.h> #include <linux/trace_seq.h> #include <linux/trace_events.h> #include <linux/compiler.h> #include <linux/glob.h> #include <linux/irq_work.h> #include <linux/workqueue.h> #include <linux/ctype.h> #ifdef CONFIG_FTRACE_SYSCALLS #include <asm/unistd.h> /* For NR_SYSCALLS */ #include <asm/syscall.h> /* some archs define it here */ #endif enum trace_type { __TRACE_FIRST_TYPE = 0, TRACE_FN, TRACE_CTX, TRACE_WAKE, TRACE_STACK, TRACE_PRINT, TRACE_BPRINT, TRACE_MMIO_RW, TRACE_MMIO_MAP, TRACE_BRANCH, TRACE_GRAPH_RET, TRACE_GRAPH_ENT, TRACE_USER_STACK, TRACE_BLK, TRACE_BPUTS, TRACE_HWLAT, TRACE_RAW_DATA, __TRACE_LAST_TYPE, }; #undef __field #define __field(type, item) type item; #undef __field_fn #define __field_fn(type, item) type item; #undef __field_struct #define __field_struct(type, item) __field(type, item) #undef __field_desc #define __field_desc(type, container, item) #undef __field_packed #define __field_packed(type, container, item) #undef __array #define __array(type, item, size) type item[size]; #undef __array_desc #define __array_desc(type, container, item, size) #undef __dynamic_array #define __dynamic_array(type, item) type item[]; #undef F_STRUCT #define F_STRUCT(args...) args #undef FTRACE_ENTRY #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ struct struct_name { \ struct trace_entry ent; \ tstruct \ } #undef FTRACE_ENTRY_DUP #define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk) #undef FTRACE_ENTRY_REG #define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, regfn) \ FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) #undef FTRACE_ENTRY_PACKED #define FTRACE_ENTRY_PACKED(name, struct_name, id, tstruct, print) \ FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) __packed #include "trace_entries.h" /* Use this for memory failure errors */ #define MEM_FAIL(condition, fmt, ...) ({ \ static bool __section(".data.once") __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ __warned = true; \ pr_err("ERROR: " fmt, ##__VA_ARGS__); \ } \ unlikely(__ret_warn_once); \ }) /* * syscalls are special, and need special handling, this is why * they are not included in trace_entries.h */ struct syscall_trace_enter { struct trace_entry ent; int nr; unsigned long args[]; }; struct syscall_trace_exit { struct trace_entry ent; int nr; long ret; }; struct kprobe_trace_entry_head { struct trace_entry ent; unsigned long ip; }; struct kretprobe_trace_entry_head { struct trace_entry ent; unsigned long func; unsigned long ret_ip; }; /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: * IRQS_OFF - interrupts were disabled * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags * NEED_RESCHED - reschedule is requested * HARDIRQ - inside an interrupt handler * SOFTIRQ - inside a softirq handler */ enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, TRACE_FLAG_IRQS_NOSUPPORT = 0x02, TRACE_FLAG_NEED_RESCHED = 0x04, TRACE_FLAG_HARDIRQ = 0x08, TRACE_FLAG_SOFTIRQ = 0x10, TRACE_FLAG_PREEMPT_RESCHED = 0x20, TRACE_FLAG_NMI = 0x40, }; #define TRACE_BUF_SIZE 1024 struct trace_array; /* * The CPU trace array - it consists of thousands of trace entries * plus some other descriptor data: (for example which task started * the trace, etc.) */ struct trace_array_cpu { atomic_t disabled; void *buffer_page; /* ring buffer spare */ unsigned long entries; unsigned long saved_latency; unsigned long critical_start; unsigned long critical_end; unsigned long critical_sequence; unsigned long nice; unsigned long policy; unsigned long rt_priority; unsigned long skipped_entries; u64 preempt_timestamp; pid_t pid; kuid_t uid; char comm[TASK_COMM_LEN]; #ifdef CONFIG_FUNCTION_TRACER int ftrace_ignore_pid; #endif bool ignore_pid; }; struct tracer; struct trace_option_dentry; struct array_buffer { struct trace_array *tr; struct trace_buffer *buffer; struct trace_array_cpu __percpu *data; u64 time_start; int cpu; }; #define TRACE_FLAGS_MAX_SIZE 32 struct trace_options { struct tracer *tracer; struct trace_option_dentry *topts; }; struct trace_pid_list { int pid_max; unsigned long *pids; }; enum { TRACE_PIDS = BIT(0), TRACE_NO_PIDS = BIT(1), }; static inline bool pid_type_enabled(int type, struct trace_pid_list *pid_list, struct trace_pid_list *no_pid_list) { /* Return true if the pid list in type has pids */ return ((type & TRACE_PIDS) && pid_list) || ((type & TRACE_NO_PIDS) && no_pid_list); } static inline bool still_need_pid_events(int type, struct trace_pid_list *pid_list, struct trace_pid_list *no_pid_list) { /* * Turning off what is in @type, return true if the "other" * pid list, still has pids in it. */ return (!(type & TRACE_PIDS) && pid_list) || (!(type & TRACE_NO_PIDS) && no_pid_list); } typedef bool (*cond_update_fn_t)(struct trace_array *tr, void *cond_data); /** * struct cond_snapshot - conditional snapshot data and callback * * The cond_snapshot structure encapsulates a callback function and * data associated with the snapshot for a given tracing instance. * * When a snapshot is taken conditionally, by invoking * tracing_snapshot_cond(tr, cond_data), the cond_data passed in is * passed in turn to the cond_snapshot.update() function. That data * can be compared by the update() implementation with the cond_data * contained within the struct cond_snapshot instance associated with * the trace_array. Because the tr->max_lock is held throughout the * update() call, the update() function can directly retrieve the * cond_snapshot and cond_data associated with the per-instance * snapshot associated with the trace_array. * * The cond_snapshot.update() implementation can save data to be * associated with the snapshot if it decides to, and returns 'true' * in that case, or it returns 'false' if the conditional snapshot * shouldn't be taken. * * The cond_snapshot instance is created and associated with the * user-defined cond_data by tracing_cond_snapshot_enable(). * Likewise, the cond_snapshot instance is destroyed and is no longer * associated with the trace instance by * tracing_cond_snapshot_disable(). * * The method below is required. * * @update: When a conditional snapshot is invoked, the update() * callback function is invoked with the tr->max_lock held. The * update() implementation signals whether or not to actually * take the snapshot, by returning 'true' if so, 'false' if no * snapshot should be taken. Because the max_lock is held for * the duration of update(), the implementation is safe to * directly retrieved and save any implementation data it needs * to in association with the snapshot. */ struct cond_snapshot { void *cond_data; cond_update_fn_t update; }; /* * The trace array - an array of per-CPU trace arrays. This is the * highest level data structure that individual tracers deal with. * They have on/off state as well: */ struct trace_array { struct list_head list; char *name; struct array_buffer array_buffer; #ifdef CONFIG_TRACER_MAX_TRACE /* * The max_buffer is used to snapshot the trace when a maximum * latency is reached, or when the user initiates a snapshot. * Some tracers will use this to store a maximum trace while * it continues examining live traces. * * The buffers for the max_buffer are set up the same as the array_buffer * When a snapshot is taken, the buffer of the max_buffer is swapped * with the buffer of the array_buffer and the buffers are reset for * the array_buffer so the tracing can continue. */ struct array_buffer max_buffer; bool allocated_snapshot; #endif #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) unsigned long max_latency; #ifdef CONFIG_FSNOTIFY struct dentry *d_max_latency; struct work_struct fsnotify_work; struct irq_work fsnotify_irqwork; #endif #endif struct trace_pid_list __rcu *filtered_pids; struct trace_pid_list __rcu *filtered_no_pids; /* * max_lock is used to protect the swapping of buffers * when taking a max snapshot. The buffers themselves are * protected by per_cpu spinlocks. But the action of the swap * needs its own lock. * * This is defined as a arch_spinlock_t in order to help * with performance when lockdep debugging is enabled. * * It is also used in other places outside the update_max_tr * so it needs to be defined outside of the * CONFIG_TRACER_MAX_TRACE. */ arch_spinlock_t max_lock; int buffer_disabled; #ifdef CONFIG_FTRACE_SYSCALLS int sys_refcount_enter; int sys_refcount_exit; struct trace_event_file __rcu *enter_syscall_files[NR_syscalls]; struct trace_event_file __rcu *exit_syscall_files[NR_syscalls]; #endif int stop_count; int clock_id; int nr_topts; bool clear_trace; int buffer_percent; unsigned int n_err_log_entries; struct tracer *current_trace; unsigned int trace_flags; unsigned char trace_flags_index[TRACE_FLAGS_MAX_SIZE]; unsigned int flags; raw_spinlock_t start_lock; struct list_head err_log; struct dentry *dir; struct dentry *options; struct dentry *percpu_dir; struct dentry *event_dir; struct trace_options *topts; struct list_head systems; struct list_head events; struct trace_event_file *trace_marker_file; cpumask_var_t tracing_cpumask; /* only trace on set CPUs */ int ref; int trace_ref; #ifdef CONFIG_FUNCTION_TRACER struct ftrace_ops *ops; struct trace_pid_list __rcu *function_pids; struct trace_pid_list __rcu *function_no_pids; #ifdef CONFIG_DYNAMIC_FTRACE /* All of these are protected by the ftrace_lock */ struct list_head func_probes; struct list_head mod_trace; struct list_head mod_notrace; #endif /* function tracing enabled */ int function_enabled; #endif int time_stamp_abs_ref; struct list_head hist_vars; #ifdef CONFIG_TRACER_SNAPSHOT struct cond_snapshot *cond_snapshot; #endif }; enum { TRACE_ARRAY_FL_GLOBAL = (1 << 0) }; extern struct list_head ftrace_trace_arrays; extern struct mutex trace_types_lock; extern int trace_array_get(struct trace_array *tr); extern int tracing_check_open_get_tr(struct trace_array *tr); extern struct trace_array *trace_array_find(const char *instance); extern struct trace_array *trace_array_find_get(const char *instance); extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs); extern int tracing_set_clock(struct trace_array *tr, const char *clockstr); extern bool trace_clock_in_ns(struct trace_array *tr); /* * The global tracer (top) should be the first trace array added, * but we check the flag anyway. */ static inline struct trace_array *top_trace_array(void) { struct trace_array *tr; if (list_empty(&ftrace_trace_arrays)) return NULL; tr = list_entry(ftrace_trace_arrays.prev, typeof(*tr), list); WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL)); return tr; } #define FTRACE_CMP_TYPE(var, type) \ __builtin_types_compatible_p(typeof(var), type *) #undef IF_ASSIGN #define IF_ASSIGN(var, entry, etype, id) \ if (FTRACE_CMP_TYPE(var, etype)) { \ var = (typeof(var))(entry); \ WARN_ON(id != 0 && (entry)->type != id); \ break; \ } /* Will cause compile errors if type is not found. */ extern void __ftrace_bad_type(void); /* * The trace_assign_type is a verifier that the entry type is * the same as the type being assigned. To add new types simply * add a line with the following format: * * IF_ASSIGN(var, ent, type, id); * * Where "type" is the trace type that includes the trace_entry * as the "ent" item. And "id" is the trace identifier that is * used in the trace_type enum. * * If the type can have more than one id, then use zero. */ #define trace_assign_type(var, ent) \ do { \ IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \ IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ IF_ASSIGN(var, ent, struct bputs_entry, TRACE_BPUTS); \ IF_ASSIGN(var, ent, struct hwlat_entry, TRACE_HWLAT); \ IF_ASSIGN(var, ent, struct raw_data_entry, TRACE_RAW_DATA);\ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ TRACE_MMIO_MAP); \ IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ TRACE_GRAPH_ENT); \ IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ TRACE_GRAPH_RET); \ __ftrace_bad_type(); \ } while (0) /* * An option specific to a tracer. This is a boolean value. * The bit is the bit index that sets its value on the * flags value in struct tracer_flags. */ struct tracer_opt { const char *name; /* Will appear on the trace_options file */ u32 bit; /* Mask assigned in val field in tracer_flags */ }; /* * The set of specific options for a tracer. Your tracer * have to set the initial value of the flags val. */ struct tracer_flags { u32 val; struct tracer_opt *opts; struct tracer *trace; }; /* Makes more easy to define a tracer opt */ #define TRACER_OPT(s, b) .name = #s, .bit = b struct trace_option_dentry { struct tracer_opt *opt; struct tracer_flags *flags; struct trace_array *tr; struct dentry *entry; }; /** * struct tracer - a specific tracer and its callbacks to interact with tracefs * @name: the name chosen to select it on the available_tracers file * @init: called when one switches to this tracer (echo name > current_tracer) * @reset: called when one switches to another tracer * @start: called when tracing is unpaused (echo 1 > tracing_on) * @stop: called when tracing is paused (echo 0 > tracing_on) * @update_thresh: called when tracing_thresh is updated * @open: called when the trace file is opened * @pipe_open: called when the trace_pipe file is opened * @close: called when the trace file is released * @pipe_close: called when the trace_pipe file is released * @read: override the default read callback on trace_pipe * @splice_read: override the default splice_read callback on trace_pipe * @selftest: selftest to run on boot (see trace_selftest.c) * @print_headers: override the first lines that describe your columns * @print_line: callback that prints a trace * @set_flag: signals one of your private flags changed (trace_options file) * @flags: your private flags */ struct tracer { const char *name; int (*init)(struct trace_array *tr); void (*reset)(struct trace_array *tr); void (*start)(struct trace_array *tr); void (*stop)(struct trace_array *tr); int (*update_thresh)(struct trace_array *tr); void (*open)(struct trace_iterator *iter); void (*pipe_open)(struct trace_iterator *iter); void (*close)(struct trace_iterator *iter); void (*pipe_close)(struct trace_iterator *iter); ssize_t (*read)(struct trace_iterator *iter, struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos); ssize_t (*splice_read)(struct trace_iterator *iter, struct file *filp, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); #ifdef CONFIG_FTRACE_STARTUP_TEST int (*selftest)(struct tracer *trace, struct trace_array *tr); #endif void (*print_header)(struct seq_file *m); enum print_line_t (*print_line)(struct trace_iterator *iter); /* If you handled the flag setting, return 0 */ int (*set_flag)(struct trace_array *tr, u32 old_flags, u32 bit, int set); /* Return 0 if OK with change, else return non-zero */ int (*flag_changed)(struct trace_array *tr, u32 mask, int set); struct tracer *next; struct tracer_flags *flags; int enabled; bool print_max; bool allow_instances; #ifdef CONFIG_TRACER_MAX_TRACE bool use_max_tr; #endif /* True if tracer cannot be enabled in kernel param */ bool noboot; }; /* Only current can touch trace_recursion */ /* * For function tracing recursion: * The order of these bits are important. * * When function tracing occurs, the following steps are made: * If arch does not support a ftrace feature: * call internal function (uses INTERNAL bits) which calls... * If callback is registered to the "global" list, the list * function is called and recursion checks the GLOBAL bits. * then this function calls... * The function callback, which can use the FTRACE bits to * check for recursion. */ enum { /* Function recursion bits */ TRACE_FTRACE_BIT, TRACE_FTRACE_NMI_BIT, TRACE_FTRACE_IRQ_BIT, TRACE_FTRACE_SIRQ_BIT, TRACE_FTRACE_TRANSITION_BIT, /* Internal use recursion bits */ TRACE_INTERNAL_BIT, TRACE_INTERNAL_NMI_BIT, TRACE_INTERNAL_IRQ_BIT, TRACE_INTERNAL_SIRQ_BIT, TRACE_INTERNAL_TRANSITION_BIT, TRACE_BRANCH_BIT, /* * Abuse of the trace_recursion. * As we need a way to maintain state if we are tracing the function * graph in irq because we want to trace a particular function that * was called in irq context but we have irq tracing off. Since this * can only be modified by current, we can reuse trace_recursion. */ TRACE_IRQ_BIT, /* Set if the function is in the set_graph_function file */ TRACE_GRAPH_BIT, /* * In the very unlikely case that an interrupt came in * at a start of graph tracing, and we want to trace * the function in that interrupt, the depth can be greater * than zero, because of the preempted start of a previous * trace. In an even more unlikely case, depth could be 2 * if a softirq interrupted the start of graph tracing, * followed by an interrupt preempting a start of graph * tracing in the softirq, and depth can even be 3 * if an NMI came in at the start of an interrupt function * that preempted a softirq start of a function that * preempted normal context!!!! Luckily, it can't be * greater than 3, so the next two bits are a mask * of what the depth is when we set TRACE_GRAPH_BIT */ TRACE_GRAPH_DEPTH_START_BIT, TRACE_GRAPH_DEPTH_END_BIT, /* * To implement set_graph_notrace, if this bit is set, we ignore * function graph tracing of called functions, until the return * function is called to clear it. */ TRACE_GRAPH_NOTRACE_BIT, }; #define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) #define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0) #define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit))) #define trace_recursion_depth() \ (((current)->trace_recursion >> TRACE_GRAPH_DEPTH_START_BIT) & 3) #define trace_recursion_set_depth(depth) \ do { \ current->trace_recursion &= \ ~(3 << TRACE_GRAPH_DEPTH_START_BIT); \ current->trace_recursion |= \ ((depth) & 3) << TRACE_GRAPH_DEPTH_START_BIT; \ } while (0) #define TRACE_CONTEXT_BITS 4 #define TRACE_FTRACE_START TRACE_FTRACE_BIT #define TRACE_LIST_START TRACE_INTERNAL_BIT #define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) enum { TRACE_CTX_NMI, TRACE_CTX_IRQ, TRACE_CTX_SOFTIRQ, TRACE_CTX_NORMAL, TRACE_CTX_TRANSITION, }; static __always_inline int trace_get_context_bit(void) { int bit; if (in_interrupt()) { if (in_nmi()) bit = TRACE_CTX_NMI; else if (in_irq()) bit = TRACE_CTX_IRQ; else bit = TRACE_CTX_SOFTIRQ; } else bit = TRACE_CTX_NORMAL; return bit; } static __always_inline int trace_test_and_set_recursion(int start) { unsigned int val = current->trace_recursion; int bit; bit = trace_get_context_bit() + start; if (unlikely(val & (1 << bit))) { /* * It could be that preempt_count has not been updated during * a switch between contexts. Allow for a single recursion. */ bit = start + TRACE_CTX_TRANSITION; if (trace_recursion_test(bit)) return -1; trace_recursion_set(bit); barrier(); return bit; } val |= 1 << bit; current->trace_recursion = val; barrier(); return bit; } static __always_inline void trace_clear_recursion(int bit) { unsigned int val = current->trace_recursion; bit = 1 << bit; val &= ~bit; barrier(); current->trace_recursion = val; } static inline struct ring_buffer_iter * trace_buffer_iter(struct trace_iterator *iter, int cpu) { return iter->buffer_iter ? iter->buffer_iter[cpu] : NULL; } int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); void tracing_reset_online_cpus(struct array_buffer *buf); void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); int tracing_open_generic(struct inode *inode, struct file *filp); int tracing_open_generic_tr(struct inode *inode, struct file *filp); bool tracing_is_disabled(void); bool tracer_tracing_is_on(struct trace_array *tr); void tracer_tracing_on(struct trace_array *tr); void tracer_tracing_off(struct trace_array *tr); struct dentry *trace_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); int tracing_init_dentry(void); struct ring_buffer_event; struct ring_buffer_event * trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, unsigned long flags, int pc); struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts); void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, struct ring_buffer_event *event); int trace_empty(struct trace_iterator *iter); void *trace_find_next_entry_inc(struct trace_iterator *iter); void trace_init_global_iter(struct trace_iterator *iter); void tracing_iter_reset(struct trace_iterator *iter, int cpu); unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu); unsigned long trace_total_entries(struct trace_array *tr); void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); void trace_graph_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); void trace_latency_header(struct seq_file *m); void trace_default_header(struct seq_file *m); void print_trace_header(struct seq_file *m, struct trace_iterator *iter); int trace_empty(struct trace_iterator *iter); void trace_graph_return(struct ftrace_graph_ret *trace); int trace_graph_entry(struct ftrace_graph_ent *trace); void set_graph_array(struct trace_array *tr); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); void tracing_start_tgid_record(void); void tracing_stop_tgid_record(void); int register_tracer(struct tracer *type); int is_tracing_stopped(void); loff_t tracing_lseek(struct file *file, loff_t offset, int whence); extern cpumask_var_t __read_mostly tracing_buffer_mask; #define for_each_tracing_cpu(cpu) \ for_each_cpu(cpu, tracing_buffer_mask) extern unsigned long nsecs_to_usecs(unsigned long nsecs); extern unsigned long tracing_thresh; /* PID filtering */ extern int pid_max; bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid); bool trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct trace_pid_list *filtered_no_pids, struct task_struct *task); void trace_filter_add_remove_task(struct trace_pid_list *pid_list, struct task_struct *self, struct task_struct *task); void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos); void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos); int trace_pid_show(struct seq_file *m, void *v); void trace_free_pid_list(struct trace_pid_list *pid_list); int trace_pid_write(struct trace_pid_list *filtered_pids, struct trace_pid_list **new_pid_list, const char __user *ubuf, size_t cnt); #ifdef CONFIG_TRACER_MAX_TRACE void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, void *cond_data); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); #endif /* CONFIG_TRACER_MAX_TRACE */ #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \ defined(CONFIG_FSNOTIFY) void latency_fsnotify(struct trace_array *tr); #else static inline void latency_fsnotify(struct trace_array *tr) { } #endif #ifdef CONFIG_STACKTRACE void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc); #else static inline void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc) { } #endif /* CONFIG_STACKTRACE */ extern u64 ftrace_now(int cpu); extern void trace_find_cmdline(int pid, char comm[]); extern int trace_find_tgid(int pid); extern void trace_event_follow_fork(struct trace_array *tr, bool enable); #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; extern unsigned long ftrace_number_of_pages; extern unsigned long ftrace_number_of_groups; void ftrace_init_trace_array(struct trace_array *tr); #else static inline void ftrace_init_trace_array(struct trace_array *tr) { } #endif #define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func extern int DYN_FTRACE_TEST_NAME(void); #define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2 extern int DYN_FTRACE_TEST_NAME2(void); extern bool ring_buffer_expanded; extern bool tracing_selftest_disabled; #ifdef CONFIG_FTRACE_STARTUP_TEST extern void __init disable_tracing_selftest(const char *reason); extern int trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_function_graph(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr); /* * Tracer data references selftest functions that only occur * on boot up. These can be __init functions. Thus, when selftests * are enabled, then the tracers need to reference __init functions. */ #define __tracer_data __refdata #else static inline void __init disable_tracing_selftest(const char *reason) { } /* Tracers are seldom changed. Optimize when selftests are disabled. */ #define __tracer_data __read_mostly #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); extern unsigned long long ns2usecs(u64 nsec); extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args); extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args); extern int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args); int trace_array_printk_buf(struct trace_buffer *buffer, unsigned long ip, const char *fmt, ...); void trace_printk_seq(struct trace_seq *s); enum print_line_t print_trace_line(struct trace_iterator *iter); extern char trace_find_mark(unsigned long long duration); struct ftrace_hash; struct ftrace_mod_load { struct list_head list; char *func; char *module; int enable; }; enum { FTRACE_HASH_FL_MOD = (1 << 0), }; struct ftrace_hash { unsigned long size_bits; struct hlist_head *buckets; unsigned long count; unsigned long flags; struct rcu_head rcu; }; struct ftrace_func_entry * ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip); static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash) { return !hash || !(hash->count || (hash->flags & FTRACE_HASH_FL_MOD)); } /* Standard output formatting function used for function return traces */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Flag options */ #define TRACE_GRAPH_PRINT_OVERRUN 0x1 #define TRACE_GRAPH_PRINT_CPU 0x2 #define TRACE_GRAPH_PRINT_OVERHEAD 0x4 #define TRACE_GRAPH_PRINT_PROC 0x8 #define TRACE_GRAPH_PRINT_DURATION 0x10 #define TRACE_GRAPH_PRINT_ABS_TIME 0x20 #define TRACE_GRAPH_PRINT_REL_TIME 0x40 #define TRACE_GRAPH_PRINT_IRQS 0x80 #define TRACE_GRAPH_PRINT_TAIL 0x100 #define TRACE_GRAPH_SLEEP_TIME 0x200 #define TRACE_GRAPH_GRAPH_TIME 0x400 #define TRACE_GRAPH_PRINT_FILL_SHIFT 28 #define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT) extern void ftrace_graph_sleep_time_control(bool enable); #ifdef CONFIG_FUNCTION_PROFILER extern void ftrace_graph_graph_time_control(bool enable); #else static inline void ftrace_graph_graph_time_control(bool enable) { } #endif extern enum print_line_t print_graph_function_flags(struct trace_iterator *iter, u32 flags); extern void print_graph_headers_flags(struct seq_file *s, u32 flags); extern void trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); extern void graph_trace_open(struct trace_iterator *iter); extern void graph_trace_close(struct trace_iterator *iter); extern int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, unsigned long flags, int pc); extern void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, unsigned long flags, int pc); #ifdef CONFIG_DYNAMIC_FTRACE extern struct ftrace_hash __rcu *ftrace_graph_hash; extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash; static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { unsigned long addr = trace->func; int ret = 0; struct ftrace_hash *hash; preempt_disable_notrace(); /* * Have to open code "rcu_dereference_sched()" because the * function graph tracer can be called when RCU is not * "watching". * Protected with schedule_on_each_cpu(ftrace_sync) */ hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible()); if (ftrace_hash_empty(hash)) { ret = 1; goto out; } if (ftrace_lookup_ip(hash, addr)) { /* * This needs to be cleared on the return functions * when the depth is zero. */ trace_recursion_set(TRACE_GRAPH_BIT); trace_recursion_set_depth(trace->depth); /* * If no irqs are to be traced, but a set_graph_function * is set, and called by an interrupt handler, we still * want to trace it. */ if (in_irq()) trace_recursion_set(TRACE_IRQ_BIT); else trace_recursion_clear(TRACE_IRQ_BIT); ret = 1; } out: preempt_enable_notrace(); return ret; } static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) { if (trace_recursion_test(TRACE_GRAPH_BIT) && trace->depth == trace_recursion_depth()) trace_recursion_clear(TRACE_GRAPH_BIT); } static inline int ftrace_graph_notrace_addr(unsigned long addr) { int ret = 0; struct ftrace_hash *notrace_hash; preempt_disable_notrace(); /* * Have to open code "rcu_dereference_sched()" because the * function graph tracer can be called when RCU is not * "watching". * Protected with schedule_on_each_cpu(ftrace_sync) */ notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash, !preemptible()); if (ftrace_lookup_ip(notrace_hash, addr)) ret = 1; preempt_enable_notrace(); return ret; } #else static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { return 1; } static inline int ftrace_graph_notrace_addr(unsigned long addr) { return 0; } static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) { } #endif /* CONFIG_DYNAMIC_FTRACE */ extern unsigned int fgraph_max_depth; static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace) { /* trace it when it is-nested-in or is a function enabled. */ return !(trace_recursion_test(TRACE_GRAPH_BIT) || ftrace_graph_addr(trace)) || (trace->depth < 0) || (fgraph_max_depth && trace->depth >= fgraph_max_depth); } #else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t print_graph_function_flags(struct trace_iterator *iter, u32 flags) { return TRACE_TYPE_UNHANDLED; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ extern struct list_head ftrace_pids; #ifdef CONFIG_FUNCTION_TRACER #define FTRACE_PID_IGNORE -1 #define FTRACE_PID_TRACE -2 struct ftrace_func_command { struct list_head list; char *name; int (*func)(struct trace_array *tr, struct ftrace_hash *hash, char *func, char *cmd, char *params, int enable); }; extern bool ftrace_filter_param __initdata; static inline int ftrace_trace_task(struct trace_array *tr) { return this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid) != FTRACE_PID_IGNORE; } extern int ftrace_is_dead(void); int ftrace_create_function_files(struct trace_array *tr, struct dentry *parent); void ftrace_destroy_function_files(struct trace_array *tr); int ftrace_allocate_ftrace_ops(struct trace_array *tr); void ftrace_free_ftrace_ops(struct trace_array *tr); void ftrace_init_global_array_ops(struct trace_array *tr); void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func); void ftrace_reset_array_ops(struct trace_array *tr); void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer); void ftrace_init_tracefs_toplevel(struct trace_array *tr, struct dentry *d_tracer); void ftrace_clear_pids(struct trace_array *tr); int init_function_trace(void); void ftrace_pid_follow_fork(struct trace_array *tr, bool enable); #else static inline int ftrace_trace_task(struct trace_array *tr) { return 1; } static inline int ftrace_is_dead(void) { return 0; } static inline int ftrace_create_function_files(struct trace_array *tr, struct dentry *parent) { return 0; } static inline int ftrace_allocate_ftrace_ops(struct trace_array *tr) { return 0; } static inline void ftrace_free_ftrace_ops(struct trace_array *tr) { } static inline void ftrace_destroy_function_files(struct trace_array *tr) { } static inline __init void ftrace_init_global_array_ops(struct trace_array *tr) { } static inline void ftrace_reset_array_ops(struct trace_array *tr) { } static inline void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d) { } static inline void ftrace_init_tracefs_toplevel(struct trace_array *tr, struct dentry *d) { } static inline void ftrace_clear_pids(struct trace_array *tr) { } static inline int init_function_trace(void) { return 0; } static inline void ftrace_pid_follow_fork(struct trace_array *tr, bool enable) { } /* ftace_func_t type is not defined, use macro instead of static inline */ #define ftrace_init_array_ops(tr, func) do { } while (0) #endif /* CONFIG_FUNCTION_TRACER */ #if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) struct ftrace_probe_ops { void (*func)(unsigned long ip, unsigned long parent_ip, struct trace_array *tr, struct ftrace_probe_ops *ops, void *data); int (*init)(struct ftrace_probe_ops *ops, struct trace_array *tr, unsigned long ip, void *init_data, void **data); void (*free)(struct ftrace_probe_ops *ops, struct trace_array *tr, unsigned long ip, void *data); int (*print)(struct seq_file *m, unsigned long ip, struct ftrace_probe_ops *ops, void *data); }; struct ftrace_func_mapper; typedef int (*ftrace_mapper_func)(void *data); struct ftrace_func_mapper *allocate_ftrace_func_mapper(void); void **ftrace_func_mapper_find_ip(struct ftrace_func_mapper *mapper, unsigned long ip); int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper, unsigned long ip, void *data); void *ftrace_func_mapper_remove_ip(struct ftrace_func_mapper *mapper, unsigned long ip); void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper, ftrace_mapper_func free_func); extern int register_ftrace_function_probe(char *glob, struct trace_array *tr, struct ftrace_probe_ops *ops, void *data); extern int unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, struct ftrace_probe_ops *ops); extern void clear_ftrace_function_probes(struct trace_array *tr); int register_ftrace_command(struct ftrace_func_command *cmd); int unregister_ftrace_command(struct ftrace_func_command *cmd); void ftrace_create_filter_files(struct ftrace_ops *ops, struct dentry *parent); void ftrace_destroy_filter_files(struct ftrace_ops *ops); extern int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); extern int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); #else struct ftrace_func_command; static inline __init int register_ftrace_command(struct ftrace_func_command *cmd) { return -EINVAL; } static inline __init int unregister_ftrace_command(char *cmd_name) { return -EINVAL; } static inline void clear_ftrace_function_probes(struct trace_array *tr) { } /* * The ops parameter passed in is usually undefined. * This must be a macro. */ #define ftrace_create_filter_files(ops, parent) do { } while (0) #define ftrace_destroy_filter_files(ops) do { } while (0) #endif /* CONFIG_FUNCTION_TRACER && CONFIG_DYNAMIC_FTRACE */ bool ftrace_event_is_function(struct trace_event_call *call); /* * struct trace_parser - servers for reading the user input separated by spaces * @cont: set if the input is not complete - no final space char was found * @buffer: holds the parsed user input * @idx: user input length * @size: buffer size */ struct trace_parser { bool cont; char *buffer; unsigned idx; unsigned size; }; static inline bool trace_parser_loaded(struct trace_parser *parser) { return (parser->idx != 0); } static inline bool trace_parser_cont(struct trace_parser *parser) { return parser->cont; } static inline void trace_parser_clear(struct trace_parser *parser) { parser->cont = false; parser->idx = 0; } extern int trace_parser_get_init(struct trace_parser *parser, int size); extern void trace_parser_put(struct trace_parser *parser); extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, size_t cnt, loff_t *ppos); /* * Only create function graph options if function graph is configured. */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER # define FGRAPH_FLAGS \ C(DISPLAY_GRAPH, "display-graph"), #else # define FGRAPH_FLAGS #endif #ifdef CONFIG_BRANCH_TRACER # define BRANCH_FLAGS \ C(BRANCH, "branch"), #else # define BRANCH_FLAGS #endif #ifdef CONFIG_FUNCTION_TRACER # define FUNCTION_FLAGS \ C(FUNCTION, "function-trace"), \ C(FUNC_FORK, "function-fork"), # define FUNCTION_DEFAULT_FLAGS TRACE_ITER_FUNCTION #else # define FUNCTION_FLAGS # define FUNCTION_DEFAULT_FLAGS 0UL # define TRACE_ITER_FUNC_FORK 0UL #endif #ifdef CONFIG_STACKTRACE # define STACK_FLAGS \ C(STACKTRACE, "stacktrace"), #else # define STACK_FLAGS #endif /* * trace_iterator_flags is an enumeration that defines bit * positions into trace_flags that controls the output. * * NOTE: These bits must match the trace_options array in * trace.c (this macro guarantees it). */ #define TRACE_FLAGS \ C(PRINT_PARENT, "print-parent"), \ C(SYM_OFFSET, "sym-offset"), \ C(SYM_ADDR, "sym-addr"), \ C(VERBOSE, "verbose"), \ C(RAW, "raw"), \ C(HEX, "hex"), \ C(BIN, "bin"), \ C(BLOCK, "block"), \ C(PRINTK, "trace_printk"), \ C(ANNOTATE, "annotate"), \ C(USERSTACKTRACE, "userstacktrace"), \ C(SYM_USEROBJ, "sym-userobj"), \ C(PRINTK_MSGONLY, "printk-msg-only"), \ C(CONTEXT_INFO, "context-info"), /* Print pid/cpu/time */ \ C(LATENCY_FMT, "latency-format"), \ C(RECORD_CMD, "record-cmd"), \ C(RECORD_TGID, "record-tgid"), \ C(OVERWRITE, "overwrite"), \ C(STOP_ON_FREE, "disable_on_free"), \ C(IRQ_INFO, "irq-info"), \ C(MARKERS, "markers"), \ C(EVENT_FORK, "event-fork"), \ C(PAUSE_ON_TRACE, "pause-on-trace"), \ FUNCTION_FLAGS \ FGRAPH_FLAGS \ STACK_FLAGS \ BRANCH_FLAGS /* * By defining C, we can make TRACE_FLAGS a list of bit names * that will define the bits for the flag masks. */ #undef C #define C(a, b) TRACE_ITER_##a##_BIT enum trace_iterator_bits { TRACE_FLAGS /* Make sure we don't go more than we have bits for */ TRACE_ITER_LAST_BIT }; /* * By redefining C, we can make TRACE_FLAGS a list of masks that * use the bits as defined above. */ #undef C #define C(a, b) TRACE_ITER_##a = (1 << TRACE_ITER_##a##_BIT) enum trace_iterator_flags { TRACE_FLAGS }; /* * TRACE_ITER_SYM_MASK masks the options in trace_flags that * control the output of kernel symbols. */ #define TRACE_ITER_SYM_MASK \ (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) extern struct tracer nop_trace; #ifdef CONFIG_BRANCH_TRACER extern int enable_branch_tracing(struct trace_array *tr); extern void disable_branch_tracing(void); static inline int trace_branch_enable(struct trace_array *tr) { if (tr->trace_flags & TRACE_ITER_BRANCH) return enable_branch_tracing(tr); return 0; } static inline void trace_branch_disable(void) { /* due to races, always disable */ disable_branch_tracing(); } #else static inline int trace_branch_enable(struct trace_array *tr) { return 0; } static inline void trace_branch_disable(void) { } #endif /* CONFIG_BRANCH_TRACER */ /* set ring buffers to default size if not already done so */ int tracing_update_buffers(void); struct ftrace_event_field { struct list_head link; const char *name; const char *type; int filter_type; int offset; int size; int is_signed; }; struct prog_entry; struct event_filter { struct prog_entry __rcu *prog; char *filter_string; }; struct event_subsystem { struct list_head list; const char *name; struct event_filter *filter; int ref_count; }; struct trace_subsystem_dir { struct list_head list; struct event_subsystem *subsystem; struct trace_array *tr; struct dentry *entry; int ref_count; int nr_events; }; extern int call_filter_check_discard(struct trace_event_call *call, void *rec, struct trace_buffer *buffer, struct ring_buffer_event *event); void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc, struct pt_regs *regs); static inline void trace_buffer_unlock_commit(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc) { trace_buffer_unlock_commit_regs(tr, buffer, event, flags, pc, NULL); } DECLARE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); DECLARE_PER_CPU(int, trace_buffered_event_cnt); void trace_buffered_event_disable(void); void trace_buffered_event_enable(void); static inline void __trace_event_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { if (this_cpu_read(trace_buffered_event) == event) { /* Simply release the temp buffer */ this_cpu_dec(trace_buffered_event_cnt); return; } ring_buffer_discard_commit(buffer, event); } /* * Helper function for event_trigger_unlock_commit{_regs}(). * If there are event triggers attached to this event that requires * filtering against its fields, then they will be called as the * entry already holds the field information of the current event. * * It also checks if the event should be discarded or not. * It is to be discarded if the event is soft disabled and the * event was only recorded to process triggers, or if the event * filter is active and this event did not match the filters. * * Returns true if the event is discarded, false otherwise. */ static inline bool __event_trigger_test_discard(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, enum event_trigger_type *tt) { unsigned long eflags = file->flags; if (eflags & EVENT_FILE_FL_TRIGGER_COND) *tt = event_triggers_call(file, entry, event); if (likely(!(file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED | EVENT_FILE_FL_PID_FILTER)))) return false; if (file->flags & EVENT_FILE_FL_SOFT_DISABLED) goto discard; if (file->flags & EVENT_FILE_FL_FILTERED && !filter_match_preds(file->filter, entry)) goto discard; if ((file->flags & EVENT_FILE_FL_PID_FILTER) && trace_event_ignore_this_pid(file)) goto discard; return false; discard: __trace_event_discard_commit(buffer, event); return true; } /** * event_trigger_unlock_commit - handle triggers and finish event commit * @file: The file pointer assoctiated to the event * @buffer: The ring buffer that the event is being written to * @event: The event meta data in the ring buffer * @entry: The event itself * @irq_flags: The state of the interrupts at the start of the event * @pc: The state of the preempt count at the start of the event. * * This is a helper function to handle triggers that require data * from the event itself. It also tests the event against filters and * if the event is soft disabled and should be discarded. */ static inline void event_trigger_unlock_commit(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, unsigned long irq_flags, int pc) { enum event_trigger_type tt = ETT_NONE; if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); if (tt) event_triggers_post_call(file, tt); } /** * event_trigger_unlock_commit_regs - handle triggers and finish event commit * @file: The file pointer assoctiated to the event * @buffer: The ring buffer that the event is being written to * @event: The event meta data in the ring buffer * @entry: The event itself * @irq_flags: The state of the interrupts at the start of the event * @pc: The state of the preempt count at the start of the event. * * This is a helper function to handle triggers that require data * from the event itself. It also tests the event against filters and * if the event is soft disabled and should be discarded. * * Same as event_trigger_unlock_commit() but calls * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit(). */ static inline void event_trigger_unlock_commit_regs(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, unsigned long irq_flags, int pc, struct pt_regs *regs) { enum event_trigger_type tt = ETT_NONE; if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) trace_buffer_unlock_commit_regs(file->tr, buffer, event, irq_flags, pc, regs); if (tt) event_triggers_post_call(file, tt); } #define FILTER_PRED_INVALID ((unsigned short)-1) #define FILTER_PRED_IS_RIGHT (1 << 15) #define FILTER_PRED_FOLD (1 << 15) /* * The max preds is the size of unsigned short with * two flags at the MSBs. One bit is used for both the IS_RIGHT * and FOLD flags. The other is reserved. * * 2^14 preds is way more than enough. */ #define MAX_FILTER_PRED 16384 struct filter_pred; struct regex; typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); typedef int (*regex_match_func)(char *str, struct regex *r, int len); enum regex_type { MATCH_FULL = 0, MATCH_FRONT_ONLY, MATCH_MIDDLE_ONLY, MATCH_END_ONLY, MATCH_GLOB, MATCH_INDEX, }; struct regex { char pattern[MAX_FILTER_STR_VAL]; int len; int field_len; regex_match_func match; }; struct filter_pred { filter_pred_fn_t fn; u64 val; struct regex regex; unsigned short *ops; struct ftrace_event_field *field; int offset; int not; int op; }; static inline bool is_string_field(struct ftrace_event_field *field) { return field->filter_type == FILTER_DYN_STRING || field->filter_type == FILTER_STATIC_STRING || field->filter_type == FILTER_PTR_STRING || field->filter_type == FILTER_COMM; } static inline bool is_function_field(struct ftrace_event_field *field) { return field->filter_type == FILTER_TRACE_FN; } extern enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not); extern void print_event_filter(struct trace_event_file *file, struct trace_seq *s); extern int apply_event_filter(struct trace_event_file *file, char *filter_string); extern int apply_subsystem_event_filter(struct trace_subsystem_dir *dir, char *filter_string); extern void print_subsystem_event_filter(struct event_subsystem *system, struct trace_seq *s); extern int filter_assign_type(const char *type); extern int create_event_filter(struct trace_array *tr, struct trace_event_call *call, char *filter_str, bool set_str, struct event_filter **filterp); extern void free_event_filter(struct event_filter *filter); struct ftrace_event_field * trace_find_event_field(struct trace_event_call *call, char *name); extern void trace_event_enable_cmd_record(bool enable); extern void trace_event_enable_tgid_record(bool enable); extern int event_trace_init(void); extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr); extern int event_trace_del_tracer(struct trace_array *tr); extern void __trace_early_add_events(struct trace_array *tr); extern struct trace_event_file *__find_event_file(struct trace_array *tr, const char *system, const char *event); extern struct trace_event_file *find_event_file(struct trace_array *tr, const char *system, const char *event); static inline void *event_file_data(struct file *filp) { return READ_ONCE(file_inode(filp)->i_private); } extern struct mutex event_mutex; extern struct list_head ftrace_events; extern const struct file_operations event_trigger_fops; extern const struct file_operations event_hist_fops; extern const struct file_operations event_hist_debug_fops; extern const struct file_operations event_inject_fops; #ifdef CONFIG_HIST_TRIGGERS extern int register_trigger_hist_cmd(void); extern int register_trigger_hist_enable_disable_cmds(void); #else static inline int register_trigger_hist_cmd(void) { return 0; } static inline int register_trigger_hist_enable_disable_cmds(void) { return 0; } #endif extern int register_trigger_cmds(void); extern void clear_event_triggers(struct trace_array *tr); struct event_trigger_data { unsigned long count; int ref; struct event_trigger_ops *ops; struct event_command *cmd_ops; struct event_filter __rcu *filter; char *filter_str; void *private_data; bool paused; bool paused_tmp; struct list_head list; char *name; struct list_head named_list; struct event_trigger_data *named_data; }; /* Avoid typos */ #define ENABLE_EVENT_STR "enable_event" #define DISABLE_EVENT_STR "disable_event" #define ENABLE_HIST_STR "enable_hist" #define DISABLE_HIST_STR "disable_hist" struct enable_trigger_data { struct trace_event_file *file; bool enable; bool hist; }; extern int event_enable_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, struct event_trigger_data *data); extern void event_enable_trigger_free(struct event_trigger_ops *ops, struct event_trigger_data *data); extern int event_enable_trigger_func(struct event_command *cmd_ops, struct trace_event_file *file, char *glob, char *cmd, char *param); extern int event_enable_register_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); extern void event_enable_unregister_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *test, struct trace_event_file *file); extern void trigger_data_free(struct event_trigger_data *data); extern int event_trigger_init(struct event_trigger_ops *ops, struct event_trigger_data *data); extern int trace_event_trigger_enable_disable(struct trace_event_file *file, int trigger_enable); extern void update_cond_flag(struct trace_event_file *file); extern int set_trigger_filter(char *filter_str, struct event_trigger_data *trigger_data, struct trace_event_file *file); extern struct event_trigger_data *find_named_trigger(const char *name); extern bool is_named_trigger(struct event_trigger_data *test); extern int save_named_trigger(const char *name, struct event_trigger_data *data); extern void del_named_trigger(struct event_trigger_data *data); extern void pause_named_trigger(struct event_trigger_data *data); extern void unpause_named_trigger(struct event_trigger_data *data); extern void set_named_trigger_data(struct event_trigger_data *data, struct event_trigger_data *named_data); extern struct event_trigger_data * get_named_trigger_data(struct event_trigger_data *data); extern int register_event_command(struct event_command *cmd); extern int unregister_event_command(struct event_command *cmd); extern int register_trigger_hist_enable_disable_cmds(void); /** * struct event_trigger_ops - callbacks for trace event triggers * * The methods in this structure provide per-event trigger hooks for * various trigger operations. * * All the methods below, except for @init() and @free(), must be * implemented. * * @func: The trigger 'probe' function called when the triggering * event occurs. The data passed into this callback is the data * that was supplied to the event_command @reg() function that * registered the trigger (see struct event_command) along with * the trace record, rec. * * @init: An optional initialization function called for the trigger * when the trigger is registered (via the event_command reg() * function). This can be used to perform per-trigger * initialization such as incrementing a per-trigger reference * count, for instance. This is usually implemented by the * generic utility function @event_trigger_init() (see * trace_event_triggers.c). * * @free: An optional de-initialization function called for the * trigger when the trigger is unregistered (via the * event_command @reg() function). This can be used to perform * per-trigger de-initialization such as decrementing a * per-trigger reference count and freeing corresponding trigger * data, for instance. This is usually implemented by the * generic utility function @event_trigger_free() (see * trace_event_triggers.c). * * @print: The callback function invoked to have the trigger print * itself. This is usually implemented by a wrapper function * that calls the generic utility function @event_trigger_print() * (see trace_event_triggers.c). */ struct event_trigger_ops { void (*func)(struct event_trigger_data *data, void *rec, struct ring_buffer_event *rbe); int (*init)(struct event_trigger_ops *ops, struct event_trigger_data *data); void (*free)(struct event_trigger_ops *ops, struct event_trigger_data *data); int (*print)(struct seq_file *m, struct event_trigger_ops *ops, struct event_trigger_data *data); }; /** * struct event_command - callbacks and data members for event commands * * Event commands are invoked by users by writing the command name * into the 'trigger' file associated with a trace event. The * parameters associated with a specific invocation of an event * command are used to create an event trigger instance, which is * added to the list of trigger instances associated with that trace * event. When the event is hit, the set of triggers associated with * that event is invoked. * * The data members in this structure provide per-event command data * for various event commands. * * All the data members below, except for @post_trigger, must be set * for each event command. * * @name: The unique name that identifies the event command. This is * the name used when setting triggers via trigger files. * * @trigger_type: A unique id that identifies the event command * 'type'. This value has two purposes, the first to ensure that * only one trigger of the same type can be set at a given time * for a particular event e.g. it doesn't make sense to have both * a traceon and traceoff trigger attached to a single event at * the same time, so traceon and traceoff have the same type * though they have different names. The @trigger_type value is * also used as a bit value for deferring the actual trigger * action until after the current event is finished. Some * commands need to do this if they themselves log to the trace * buffer (see the @post_trigger() member below). @trigger_type * values are defined by adding new values to the trigger_type * enum in include/linux/trace_events.h. * * @flags: See the enum event_command_flags below. * * All the methods below, except for @set_filter() and @unreg_all(), * must be implemented. * * @func: The callback function responsible for parsing and * registering the trigger written to the 'trigger' file by the * user. It allocates the trigger instance and registers it with * the appropriate trace event. It makes use of the other * event_command callback functions to orchestrate this, and is * usually implemented by the generic utility function * @event_trigger_callback() (see trace_event_triggers.c). * * @reg: Adds the trigger to the list of triggers associated with the * event, and enables the event trigger itself, after * initializing it (via the event_trigger_ops @init() function). * This is also where commands can use the @trigger_type value to * make the decision as to whether or not multiple instances of * the trigger should be allowed. This is usually implemented by * the generic utility function @register_trigger() (see * trace_event_triggers.c). * * @unreg: Removes the trigger from the list of triggers associated * with the event, and disables the event trigger itself, after * initializing it (via the event_trigger_ops @free() function). * This is usually implemented by the generic utility function * @unregister_trigger() (see trace_event_triggers.c). * * @unreg_all: An optional function called to remove all the triggers * from the list of triggers associated with the event. Called * when a trigger file is opened in truncate mode. * * @set_filter: An optional function called to parse and set a filter * for the trigger. If no @set_filter() method is set for the * event command, filters set by the user for the command will be * ignored. This is usually implemented by the generic utility * function @set_trigger_filter() (see trace_event_triggers.c). * * @get_trigger_ops: The callback function invoked to retrieve the * event_trigger_ops implementation associated with the command. */ struct event_command { struct list_head list; char *name; enum event_trigger_type trigger_type; int flags; int (*func)(struct event_command *cmd_ops, struct trace_event_file *file, char *glob, char *cmd, char *params); int (*reg)(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); void (*unreg)(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); void (*unreg_all)(struct trace_event_file *file); int (*set_filter)(char *filter_str, struct event_trigger_data *data, struct trace_event_file *file); struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param); }; /** * enum event_command_flags - flags for struct event_command * * @POST_TRIGGER: A flag that says whether or not this command needs * to have its action delayed until after the current event has * been closed. Some triggers need to avoid being invoked while * an event is currently in the process of being logged, since * the trigger may itself log data into the trace buffer. Thus * we make sure the current event is committed before invoking * those triggers. To do that, the trigger invocation is split * in two - the first part checks the filter using the current * trace record; if a command has the @post_trigger flag set, it * sets a bit for itself in the return value, otherwise it * directly invokes the trigger. Once all commands have been * either invoked or set their return flag, the current record is * either committed or discarded. At that point, if any commands * have deferred their triggers, those commands are finally * invoked following the close of the current event. In other * words, if the event_trigger_ops @func() probe implementation * itself logs to the trace buffer, this flag should be set, * otherwise it can be left unspecified. * * @NEEDS_REC: A flag that says whether or not this command needs * access to the trace record in order to perform its function, * regardless of whether or not it has a filter associated with * it (filters make a trigger require access to the trace record * but are not always present). */ enum event_command_flags { EVENT_CMD_FL_POST_TRIGGER = 1, EVENT_CMD_FL_NEEDS_REC = 2, }; static inline bool event_command_post_trigger(struct event_command *cmd_ops) { return cmd_ops->flags & EVENT_CMD_FL_POST_TRIGGER; } static inline bool event_command_needs_rec(struct event_command *cmd_ops) { return cmd_ops->flags & EVENT_CMD_FL_NEEDS_REC; } extern int trace_event_enable_disable(struct trace_event_file *file, int enable, int soft_disable); extern int tracing_alloc_snapshot(void); extern void tracing_snapshot_cond(struct trace_array *tr, void *cond_data); extern int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update); extern int tracing_snapshot_cond_disable(struct trace_array *tr); extern void *tracing_cond_snapshot_data(struct trace_array *tr); extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; extern const char *__start___tracepoint_str[]; extern const char *__stop___tracepoint_str[]; void trace_printk_control(bool enabled); void trace_printk_start_comm(void); int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled); /* Used from boot time tracer */ extern int trace_set_options(struct trace_array *tr, char *option); extern int tracing_set_tracer(struct trace_array *tr, const char *buf); extern ssize_t tracing_resize_ring_buffer(struct trace_array *tr, unsigned long size, int cpu_id); extern int tracing_set_cpumask(struct trace_array *tr, cpumask_var_t tracing_cpumask_new); #define MAX_EVENT_NAME_LEN 64 extern int trace_run_command(const char *buf, int (*createfn)(int, char**)); extern ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, int (*createfn)(int, char**)); extern unsigned int err_pos(char *cmd, const char *str); extern void tracing_log_err(struct trace_array *tr, const char *loc, const char *cmd, const char **errs, u8 type, u8 pos); /* * Normal trace_printk() and friends allocates special buffers * to do the manipulation, as well as saves the print formats * into sections to display. But the trace infrastructure wants * to use these without the added overhead at the price of being * a bit slower (used mainly for warnings, where we don't care * about performance). The internal_trace_puts() is for such * a purpose. */ #define internal_trace_puts(str) __trace_puts(_THIS_IP_, str, strlen(str)) #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ extern struct trace_event_call \ __aligned(4) event_##call; #undef FTRACE_ENTRY_DUP #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) #undef FTRACE_ENTRY_PACKED #define FTRACE_ENTRY_PACKED(call, struct_name, id, tstruct, print) \ FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) #include "trace_entries.h" #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_FUNCTION_TRACER) int perf_ftrace_event_register(struct trace_event_call *call, enum trace_reg type, void *data); #else #define perf_ftrace_event_register NULL #endif #ifdef CONFIG_FTRACE_SYSCALLS void init_ftrace_syscalls(void); const char *get_syscall_name(int syscall); #else static inline void init_ftrace_syscalls(void) { } static inline const char *get_syscall_name(int syscall) { return NULL; } #endif #ifdef CONFIG_EVENT_TRACING void trace_event_init(void); void trace_event_eval_update(struct trace_eval_map **map, int len); /* Used from boot time tracer */ extern int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); extern int trigger_process_regex(struct trace_event_file *file, char *buff); #else static inline void __init trace_event_init(void) { } static inline void trace_event_eval_update(struct trace_eval_map **map, int len) { } #endif #ifdef CONFIG_TRACER_SNAPSHOT void tracing_snapshot_instance(struct trace_array *tr); int tracing_alloc_snapshot_instance(struct trace_array *tr); #else static inline void tracing_snapshot_instance(struct trace_array *tr) { } static inline int tracing_alloc_snapshot_instance(struct trace_array *tr) { return 0; } #endif #ifdef CONFIG_PREEMPT_TRACER void tracer_preempt_on(unsigned long a0, unsigned long a1); void tracer_preempt_off(unsigned long a0, unsigned long a1); #else static inline void tracer_preempt_on(unsigned long a0, unsigned long a1) { } static inline void tracer_preempt_off(unsigned long a0, unsigned long a1) { } #endif #ifdef CONFIG_IRQSOFF_TRACER void tracer_hardirqs_on(unsigned long a0, unsigned long a1); void tracer_hardirqs_off(unsigned long a0, unsigned long a1); #else static inline void tracer_hardirqs_on(unsigned long a0, unsigned long a1) { } static inline void tracer_hardirqs_off(unsigned long a0, unsigned long a1) { } #endif extern struct trace_iterator *tracepoint_print_iter; /* * Reset the state of the trace_iterator so that it can read consumed data. * Normally, the trace_iterator is used for reading the data when it is not * consumed, and must retain state. */ static __always_inline void trace_iterator_reset(struct trace_iterator *iter) { const size_t offset = offsetof(struct trace_iterator, seq); /* * Keep gcc from complaining about overwriting more than just one * member in the structure. */ memset((char *)iter + offset, 0, sizeof(struct trace_iterator) - offset); iter->pos = -1; } /* Check the name is good for event/group/fields */ static inline bool is_good_name(const char *name) { if (!isalpha(*name) && *name != '_') return false; while (*++name != '\0') { if (!isalpha(*name) && !isdigit(*name) && *name != '_') return false; } return true; } #endif /* _LINUX_KERNEL_TRACE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PAGEMAP_H #define _LINUX_PAGEMAP_H /* * Copyright 1995 Linus Torvalds */ #include <linux/mm.h> #include <linux/fs.h> #include <linux/list.h> #include <linux/highmem.h> #include <linux/compiler.h> #include <linux/uaccess.h> #include <linux/gfp.h> #include <linux/bitops.h> #include <linux/hardirq.h> /* for in_interrupt() */ #include <linux/hugetlb_inline.h> struct pagevec; /* * Bits in mapping->flags. */ enum mapping_flags { AS_EIO = 0, /* IO error on async write */ AS_ENOSPC = 1, /* ENOSPC on async write */ AS_MM_ALL_LOCKS = 2, /* under mm_take_all_locks() */ AS_UNEVICTABLE = 3, /* e.g., ramdisk, SHM_LOCK */ AS_EXITING = 4, /* final truncate in progress */ /* writeback related tags are not used */ AS_NO_WRITEBACK_TAGS = 5, AS_THP_SUPPORT = 6, /* THPs supported */ }; /** * mapping_set_error - record a writeback error in the address_space * @mapping: the mapping in which an error should be set * @error: the error to set in the mapping * * When writeback fails in some way, we must record that error so that * userspace can be informed when fsync and the like are called. We endeavor * to report errors on any file that was open at the time of the error. Some * internal callers also need to know when writeback errors have occurred. * * When a writeback error occurs, most filesystems will want to call * mapping_set_error to record the error in the mapping so that it can be * reported when the application calls fsync(2). */ static inline void mapping_set_error(struct address_space *mapping, int error) { if (likely(!error)) return; /* Record in wb_err for checkers using errseq_t based tracking */ __filemap_set_wb_err(mapping, error); /* Record it in superblock */ if (mapping->host) errseq_set(&mapping->host->i_sb->s_wb_err, error); /* Record it in flags for now, for legacy callers */ if (error == -ENOSPC) set_bit(AS_ENOSPC, &mapping->flags); else set_bit(AS_EIO, &mapping->flags); } static inline void mapping_set_unevictable(struct address_space *mapping) { set_bit(AS_UNEVICTABLE, &mapping->flags); } static inline void mapping_clear_unevictable(struct address_space *mapping) { clear_bit(AS_UNEVICTABLE, &mapping->flags); } static i