1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the TCP protocol. * * Version: @(#)tcp.h 1.0.2 04/28/93 * * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> */ #ifndef _LINUX_TCP_H #define _LINUX_TCP_H #include <linux/skbuff.h> #include <linux/win_minmax.h> #include <net/sock.h> #include <net/inet_connection_sock.h> #include <net/inet_timewait_sock.h> #include <uapi/linux/tcp.h> static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb) { return (struct tcphdr *)skb_transport_header(skb); } static inline unsigned int __tcp_hdrlen(const struct tcphdr *th) { return th->doff * 4; } static inline unsigned int tcp_hdrlen(const struct sk_buff *skb) { return __tcp_hdrlen(tcp_hdr(skb)); } static inline struct tcphdr *inner_tcp_hdr(const struct sk_buff *skb) { return (struct tcphdr *)skb_inner_transport_header(skb); } static inline unsigned int inner_tcp_hdrlen(const struct sk_buff *skb) { return inner_tcp_hdr(skb)->doff * 4; } static inline unsigned int tcp_optlen(const struct sk_buff *skb) { return (tcp_hdr(skb)->doff - 5) * 4; } /* TCP Fast Open */ #define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */ #define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */ #define TCP_FASTOPEN_COOKIE_SIZE 8 /* the size employed by this impl. */ /* TCP Fast Open Cookie as stored in memory */ struct tcp_fastopen_cookie { __le64 val[DIV_ROUND_UP(TCP_FASTOPEN_COOKIE_MAX, sizeof(u64))]; s8 len; bool exp; /* In RFC6994 experimental option format */ }; /* This defines a selective acknowledgement block. */ struct tcp_sack_block_wire { __be32 start_seq; __be32 end_seq; }; struct tcp_sack_block { u32 start_seq; u32 end_seq; }; /*These are used to set the sack_ok field in struct tcp_options_received */ #define TCP_SACK_SEEN (1 << 0) /*1 = peer is SACK capable, */ #define TCP_DSACK_SEEN (1 << 2) /*1 = DSACK was received from peer*/ struct tcp_options_received { /* PAWS/RTTM data */ int ts_recent_stamp;/* Time we stored ts_recent (for aging) */ u32 ts_recent; /* Time stamp to echo next */ u32 rcv_tsval; /* Time stamp value */ u32 rcv_tsecr; /* Time stamp echo reply */ u16 saw_tstamp : 1, /* Saw TIMESTAMP on last packet */ tstamp_ok : 1, /* TIMESTAMP seen on SYN packet */ dsack : 1, /* D-SACK is scheduled */ wscale_ok : 1, /* Wscale seen on SYN packet */ sack_ok : 3, /* SACK seen on SYN packet */ smc_ok : 1, /* SMC seen on SYN packet */ snd_wscale : 4, /* Window scaling received from sender */ rcv_wscale : 4; /* Window scaling to send to receiver */ u8 saw_unknown:1, /* Received unknown option */ unused:7; u8 num_sacks; /* Number of SACK blocks */ u16 user_mss; /* mss requested by user in ioctl */ u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ }; static inline void tcp_clear_options(struct tcp_options_received *rx_opt) { rx_opt->tstamp_ok = rx_opt->sack_ok = 0; rx_opt->wscale_ok = rx_opt->snd_wscale = 0; #if IS_ENABLED(CONFIG_SMC) rx_opt->smc_ok = 0; #endif } /* This is the max number of SACKS that we'll generate and process. It's safe * to increase this, although since: * size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8) * only four options will fit in a standard TCP header */ #define TCP_NUM_SACKS 4 struct tcp_request_sock_ops; struct tcp_request_sock { struct inet_request_sock req; const struct tcp_request_sock_ops *af_specific; u64 snt_synack; /* first SYNACK sent time */ bool tfo_listener; bool is_mptcp; #if IS_ENABLED(CONFIG_MPTCP) bool drop_req; #endif u32 txhash; u32 rcv_isn; u32 snt_isn; u32 ts_off; u32 last_oow_ack_time; /* last SYNACK */ u32 rcv_nxt; /* the ack # by SYNACK. For * FastOpen it's the seq# * after data-in-SYN. */ u8 syn_tos; }; static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) { return (struct tcp_request_sock *)req; } struct tcp_sock { /* inet_connection_sock has to be the first member of tcp_sock */ struct inet_connection_sock inet_conn; u16 tcp_header_len; /* Bytes of tcp header to send */ u16 gso_segs; /* Max number of segs per GSO packet */ /* * Header prediction flags * 0x5?10 << 16 + snd_wnd in net byte order */ __be32 pred_flags; /* * RFC793 variables by their proper names. This means you can * read the code and the spec side by side (and laugh ...) * See RFC793 and RFC1122. The RFC writes these in capitals. */ u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived * sum(delta(rcv_nxt)), or how many bytes * were acked. */ u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn * total number of segments in. */ u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn * total number of data segments in. */ u32 rcv_nxt; /* What we want to receive next */ u32 copied_seq; /* Head of yet unread data */ u32 rcv_wup; /* rcv_nxt on last window update sent */ u32 snd_nxt; /* Next sequence we send */ u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut * The total number of segments sent. */ u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut * total number of data segments sent. */ u64 bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut * total number of data bytes sent. */ u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked * sum(delta(snd_una)), or how many bytes * were acked. */ u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups * total number of DSACK blocks received */ u32 snd_una; /* First byte we want an ack for */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */ u32 compressed_ack_rcv_nxt; u32 tsoffset; /* timestamp offset */ struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ u32 snd_wl1; /* Sequence for window update */ u32 snd_wnd; /* The window we expect to receive */ u32 max_window; /* Maximal window ever seen from peer */ u32 mss_cache; /* Cached effective mss, not including SACKS */ u32 window_clamp; /* Maximal window to advertise */ u32 rcv_ssthresh; /* Current window clamp */ /* Information of the most recently (s)acked skb */ struct tcp_rack { u64 mstamp; /* (Re)sent time of the skb */ u32 rtt_us; /* Associated RTT */ u32 end_seq; /* Ending TCP sequence of the skb */ u32 last_delivered; /* tp->delivered at last reo_wnd adj */ u8 reo_wnd_steps; /* Allowed reordering window */ #define TCP_RACK_RECOVERY_THRESH 16 u8 reo_wnd_persist:5, /* No. of recovery since last adj */ dsack_seen:1, /* Whether DSACK seen after last adj */ advanced:1; /* mstamp advanced since last lost marking */ } rack; u16 advmss; /* Advertised MSS */ u8 compressed_ack; u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ unused:5; u32 chrono_start; /* Start time in jiffies of a TCP chrono */ u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u8 chrono_type:2, /* current chronograph type */ rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ fastopen_client_fail:2; /* reason why fastopen failed */ u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ repair : 1, frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */ u8 repair_queue; u8 save_syn:2, /* Save headers of SYN packet */ syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_fastopen_ch:1, /* Active TFO re-enabling probe */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ u32 tlp_high_seq; /* snd_nxt at the time of TLP */ u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ u64 tcp_wstamp_ns; /* departure time for next sent data packet */ u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ /* RTT measurement */ u64 tcp_mstamp; /* most recent packet received/sent */ u32 srtt_us; /* smoothed round trip time << 3 in usecs */ u32 mdev_us; /* medium deviation */ u32 mdev_max_us; /* maximal mdev for the last rtt period */ u32 rttvar_us; /* smoothed mdev_max */ u32 rtt_seq; /* sequence number to update rttvar */ struct minmax rtt_min; u32 packets_out; /* Packets which are "in flight" */ u32 retrans_out; /* Retransmitted packets out */ u32 max_packets_out; /* max packets_out in last window */ u32 max_packets_seq; /* right edge of max_packets_out flight */ u16 urg_data; /* Saved octet of OOB data and control flags */ u8 ecn_flags; /* ECN status bits. */ u8 keepalive_probes; /* num of allowed keep alive probes */ u32 reordering; /* Packet reordering metric. */ u32 reord_seen; /* number of data packet reordering events */ u32 snd_up; /* Urgent pointer */ /* * Options received (usually on last packet, some only on SYN packets). */ struct tcp_options_received rx_opt; /* * Slow start and congestion control (see also Nagle, and Karn & Partridge) */ u32 snd_ssthresh; /* Slow start size threshold */ u32 snd_cwnd; /* Sending congestion window */ u32 snd_cwnd_cnt; /* Linear increase counter */ u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ u32 snd_cwnd_used; u32 snd_cwnd_stamp; u32 prior_cwnd; /* cwnd right before starting loss recovery */ u32 prr_delivered; /* Number of newly delivered packets to * receiver in Recovery. */ u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ u32 delivered_ce; /* Like the above but only ECE marked packets */ u32 lost; /* Total data packets lost incl. rexmits */ u32 app_limited; /* limited until "delivered" reaches this val */ u64 first_tx_mstamp; /* start of window send phase */ u64 delivered_mstamp; /* time we reached "delivered" */ u32 rate_delivered; /* saved rate sample: packets delivered */ u32 rate_interval_us; /* saved rate sample: time elapsed */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ u32 notsent_lowat; /* TCP_NOTSENT_LOWAT */ u32 pushed_seq; /* Last pushed seq, required to talk to windows */ u32 lost_out; /* Lost packets */ u32 sacked_out; /* SACK'd packets */ struct hrtimer pacing_timer; struct hrtimer compressed_ack_timer; /* from STCP, retrans queue hinting */ struct sk_buff* lost_skb_hint; struct sk_buff *retransmit_skb_hint; /* OOO segments go in this rbtree. Socket lock must be held. */ struct rb_root out_of_order_queue; struct sk_buff *ooo_last_skb; /* cache rb_last(out_of_order_queue) */ /* SACKs data, these 2 need to be together (see tcp_options_write) */ struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ struct tcp_sack_block recv_sack_cache[4]; struct sk_buff *highest_sack; /* skb just after the highest * skb with SACKed bit set * (validity guaranteed only if * sacked_out > 0) */ int lost_cnt_hint; u32 prior_ssthresh; /* ssthresh saved at recovery start */ u32 high_seq; /* snd_nxt at onset of congestion */ u32 retrans_stamp; /* Timestamp of the last retransmit, * also used in SYN-SENT to remember stamp of * the first SYN. */ u32 undo_marker; /* snd_una upon a new recovery episode. */ int undo_retrans; /* number of undoable retransmissions. */ u64 bytes_retrans; /* RFC4898 tcpEStatsPerfOctetsRetrans * Total data bytes retransmitted */ u32 total_retrans; /* Total retransmits for entire connection */ u32 urg_seq; /* Seq of received urgent pointer */ unsigned int keepalive_time; /* time before keep alive takes place */ unsigned int keepalive_intvl; /* time interval between keep alive probes */ int linger2; /* Sock_ops bpf program related variables */ #ifdef CONFIG_BPF u8 bpf_sock_ops_cb_flags; /* Control calling BPF programs * values defined in uapi/linux/tcp.h */ #define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) (TP->bpf_sock_ops_cb_flags & ARG) #else #define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0 #endif u16 timeout_rehash; /* Timeout-triggered rehash attempts */ u32 rcv_ooopack; /* Received out-of-order packets, for tcpinfo */ /* Receiver side RTT estimation */ u32 rcv_rtt_last_tsecr; struct { u32 rtt_us; u32 seq; u64 time; } rcv_rtt_est; /* Receiver queue space */ struct { u32 space; u32 seq; u64 time; } rcvq_space; /* TCP-specific MTU probe information. */ struct { u32 probe_seq_start; u32 probe_seq_end; } mtu_probe; u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG * while socket was owned by user. */ #if IS_ENABLED(CONFIG_MPTCP) bool is_mptcp; #endif #if IS_ENABLED(CONFIG_SMC) bool syn_smc; /* SYN includes SMC */ #endif #ifdef CONFIG_TCP_MD5SIG /* TCP AF-Specific parts; only used by MD5 Signature support so far */ const struct tcp_sock_af_ops *af_specific; /* TCP MD5 Signature Option information */ struct tcp_md5sig_info __rcu *md5sig_info; #endif /* TCP fastopen related information */ struct tcp_fastopen_request *fastopen_req; /* fastopen_rsk points to request_sock that resulted in this big * socket. Used to retransmit SYNACKs etc. */ struct request_sock __rcu *fastopen_rsk; struct saved_syn *saved_syn; }; enum tsq_enum { TSQ_THROTTLED, TSQ_QUEUED, TCP_TSQ_DEFERRED, /* tcp_tasklet_func() found socket was owned */ TCP_WRITE_TIMER_DEFERRED, /* tcp_write_timer() found socket was owned */ TCP_DELACK_TIMER_DEFERRED, /* tcp_delack_timer() found socket was owned */ TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call * tcp_v{4|6}_mtu_reduced() */ }; enum tsq_flags { TSQF_THROTTLED = (1UL << TSQ_THROTTLED), TSQF_QUEUED = (1UL << TSQ_QUEUED), TCPF_TSQ_DEFERRED = (1UL << TCP_TSQ_DEFERRED), TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED), TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED), TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED), }; static inline struct tcp_sock *tcp_sk(const struct sock *sk) { return (struct tcp_sock *)sk; } struct tcp_timewait_sock { struct inet_timewait_sock tw_sk; #define tw_rcv_nxt tw_sk.__tw_common.skc_tw_rcv_nxt #define tw_snd_nxt tw_sk.__tw_common.skc_tw_snd_nxt u32 tw_rcv_wnd; u32 tw_ts_offset; u32 tw_ts_recent; /* The time we sent the last out-of-window ACK: */ u32 tw_last_oow_ack_time; int tw_ts_recent_stamp; u32 tw_tx_delay; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *tw_md5_key; #endif }; static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) { return (struct tcp_timewait_sock *)sk; } static inline bool tcp_passive_fastopen(const struct sock *sk) { return sk->sk_state == TCP_SYN_RECV && rcu_access_pointer(tcp_sk(sk)->fastopen_rsk) != NULL; } static inline void fastopen_queue_tune(struct sock *sk, int backlog) { struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; int somaxconn = READ_ONCE(sock_net(sk)->core.sysctl_somaxconn); queue->fastopenq.max_qlen = min_t(unsigned int, backlog, somaxconn); } static inline void tcp_move_syn(struct tcp_sock *tp, struct request_sock *req) { tp->saved_syn = req->saved_syn; req->saved_syn = NULL; } static inline void tcp_saved_syn_free(struct tcp_sock *tp) { kfree(tp->saved_syn); tp->saved_syn = NULL; } static inline u32 tcp_saved_syn_len(const struct saved_syn *saved_syn) { return saved_syn->mac_hdrlen + saved_syn->network_hdrlen + saved_syn->tcp_hdrlen; } struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, const struct sk_buff *orig_skb); static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss) { /* We use READ_ONCE() here because socket might not be locked. * This happens for listeners. */ u16 user_mss = READ_ONCE(tp->rx_opt.user_mss); return (user_mss && user_mss < mss) ? user_mss : mss; } int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, int shiftlen); void tcp_sock_set_cork(struct sock *sk, bool on); int tcp_sock_set_keepcnt(struct sock *sk, int val); int tcp_sock_set_keepidle_locked(struct sock *sk, int val); int tcp_sock_set_keepidle(struct sock *sk, int val); int tcp_sock_set_keepintvl(struct sock *sk, int val); void tcp_sock_set_nodelay(struct sock *sk); void tcp_sock_set_quickack(struct sock *sk, int val); int tcp_sock_set_syncnt(struct sock *sk, int val); void tcp_sock_set_user_timeout(struct sock *sk, u32 val); #endif /* _LINUX_TCP_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs */ #ifndef _ASM_X86_STACKTRACE_H #define _ASM_X86_STACKTRACE_H #include <linux/uaccess.h> #include <linux/ptrace.h> #include <asm/cpu_entry_area.h> #include <asm/switch_to.h> enum stack_type { STACK_TYPE_UNKNOWN, STACK_TYPE_TASK, STACK_TYPE_IRQ, STACK_TYPE_SOFTIRQ, STACK_TYPE_ENTRY, STACK_TYPE_EXCEPTION, STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, }; struct stack_info { enum stack_type type; unsigned long *begin, *end, *next_sp; }; bool in_task_stack(unsigned long *stack, struct task_struct *task, struct stack_info *info); bool in_entry_stack(unsigned long *stack, struct stack_info *info); int get_stack_info(unsigned long *stack, struct task_struct *task, struct stack_info *info, unsigned long *visit_mask); bool get_stack_info_noinstr(unsigned long *stack, struct task_struct *task, struct stack_info *info); const char *stack_type_name(enum stack_type type); static inline bool on_stack(struct stack_info *info, void *addr, size_t len) { void *begin = info->begin; void *end = info->end; return (info->type != STACK_TYPE_UNKNOWN && addr >= begin && addr < end && addr + len > begin && addr + len <= end); } #ifdef CONFIG_X86_32 #define STACKSLOTS_PER_LINE 8 #else #define STACKSLOTS_PER_LINE 4 #endif #ifdef CONFIG_FRAME_POINTER static inline unsigned long * get_frame_pointer(struct task_struct *task, struct pt_regs *regs) { if (regs) return (unsigned long *)regs->bp; if (task == current) return __builtin_frame_address(0); return &((struct inactive_task_frame *)task->thread.sp)->bp; } #else static inline unsigned long * get_frame_pointer(struct task_struct *task, struct pt_regs *regs) { return NULL; } #endif /* CONFIG_FRAME_POINTER */ static inline unsigned long * get_stack_pointer(struct task_struct *task, struct pt_regs *regs) { if (regs) return (unsigned long *)regs->sp; if (task == current) return __builtin_frame_address(0); return (unsigned long *)task->thread.sp; } void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, const char *log_lvl); /* The form of the top of the frame on the stack */ struct stack_frame { struct stack_frame *next_frame; unsigned long return_address; }; struct stack_frame_ia32 { u32 next_frame; u32 return_address; }; void show_opcodes(struct pt_regs *regs, const char *loglvl); void show_ip(struct pt_regs *regs, const char *loglvl); #endif /* _ASM_X86_STACKTRACE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SWAP_H #define _LINUX_SWAP_H #include <linux/spinlock.h> #include <linux/linkage.h> #include <linux/mmzone.h> #include <linux/list.h> #include <linux/memcontrol.h> #include <linux/sched.h> #include <linux/node.h> #include <linux/fs.h> #include <linux/atomic.h> #include <linux/page-flags.h> #include <asm/page.h> struct notifier_block; struct bio; struct pagevec; #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_SHIFT 0 #define SWAP_FLAG_DISCARD 0x10000 /* enable discard for swap */ #define SWAP_FLAG_DISCARD_ONCE 0x20000 /* discard swap area at swapon-time */ #define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */ #define SWAP_FLAGS_VALID (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \ SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \ SWAP_FLAG_DISCARD_PAGES) #define SWAP_BATCH 64 static inline int current_is_kswapd(void) { return current->flags & PF_KSWAPD; } /* * MAX_SWAPFILES defines the maximum number of swaptypes: things which can * be swapped to. The swap type and the offset into that swap type are * encoded into pte's and into pgoff_t's in the swapcache. Using five bits * for the type means that the maximum number of swapcache pages is 27 bits * on 32-bit-pgoff_t architectures. And that assumes that the architecture packs * the type/offset into the pte as 5/27 as well. */ #define MAX_SWAPFILES_SHIFT 5 /* * Use some of the swap files numbers for other purposes. This * is a convenient way to hook into the VM to trigger special * actions on faults. */ /* * Unaddressable device memory support. See include/linux/hmm.h and * Documentation/vm/hmm.rst. Short description is we need struct pages for * device memory that is unaddressable (inaccessible) by CPU, so that we can * migrate part of a process memory to device memory. * * When a page is migrated from CPU to device, we set the CPU page table entry * to a special SWP_DEVICE_* entry. */ #ifdef CONFIG_DEVICE_PRIVATE #define SWP_DEVICE_NUM 2 #define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM) #define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1) #else #define SWP_DEVICE_NUM 0 #endif /* * NUMA node memory migration support */ #ifdef CONFIG_MIGRATION #define SWP_MIGRATION_NUM 2 #define SWP_MIGRATION_READ (MAX_SWAPFILES + SWP_HWPOISON_NUM) #define SWP_MIGRATION_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1) #else #define SWP_MIGRATION_NUM 0 #endif /* * Handling of hardware poisoned pages with memory corruption. */ #ifdef CONFIG_MEMORY_FAILURE #define SWP_HWPOISON_NUM 1 #define SWP_HWPOISON MAX_SWAPFILES #else #define SWP_HWPOISON_NUM 0 #endif #define MAX_SWAPFILES \ ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \ SWP_MIGRATION_NUM - SWP_HWPOISON_NUM) /* * Magic header for a swap area. The first part of the union is * what the swap magic looks like for the old (limited to 128MB) * swap area format, the second part of the union adds - in the * old reserved area - some extra information. Note that the first * kilobyte is reserved for boot loader or disk label stuff... * * Having the magic at the end of the PAGE_SIZE makes detecting swap * areas somewhat tricky on machines that support multiple page sizes. * For 2.5 we'll probably want to move the magic to just beyond the * bootbits... */ union swap_header { struct { char reserved[PAGE_SIZE - 10]; char magic[10]; /* SWAP-SPACE or SWAPSPACE2 */ } magic; struct { char bootbits[1024]; /* Space for disklabel etc. */ __u32 version; __u32 last_page; __u32 nr_badpages; unsigned char sws_uuid[16]; unsigned char sws_volume[16]; __u32 padding[117]; __u32 badpages[1]; } info; }; /* * current->reclaim_state points to one of these when a task is running * memory reclaim */ struct reclaim_state { unsigned long reclaimed_slab; }; #ifdef __KERNEL__ struct address_space; struct sysinfo; struct writeback_control; struct zone; /* * A swap extent maps a range of a swapfile's PAGE_SIZE pages onto a range of * disk blocks. A list of swap extents maps the entire swapfile. (Where the * term `swapfile' refers to either a blockdevice or an IS_REG file. Apart * from setup, they're handled identically. * * We always assume that blocks are of size PAGE_SIZE. */ struct swap_extent { struct rb_node rb_node; pgoff_t start_page; pgoff_t nr_pages; sector_t start_block; }; /* * Max bad pages in the new format.. */ #define MAX_SWAP_BADPAGES \ ((offsetof(union swap_header, magic.magic) - \ offsetof(union swap_header, info.badpages)) / sizeof(int)) enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ SWP_DISCARDABLE = (1 << 2), /* blkdev support discard */ SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */ SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */ SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */ SWP_BLKDEV = (1 << 6), /* its a block device */ SWP_ACTIVATED = (1 << 7), /* set after swap_activate success */ SWP_FS_OPS = (1 << 8), /* swapfile operations go through fs */ SWP_AREA_DISCARD = (1 << 9), /* single-time swap area discards */ SWP_PAGE_DISCARD = (1 << 10), /* freed swap page-cluster discards */ SWP_STABLE_WRITES = (1 << 11), /* no overwrite PG_writeback pages */ SWP_SYNCHRONOUS_IO = (1 << 12), /* synchronous IO is efficient */ SWP_VALID = (1 << 13), /* swap is valid to be operated on? */ /* add others here before... */ SWP_SCANNING = (1 << 14), /* refcount in scan_swap_map */ }; #define SWAP_CLUSTER_MAX 32UL #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX /* Bit flag in swap_map */ #define SWAP_HAS_CACHE 0x40 /* Flag page is cached, in first swap_map */ #define COUNT_CONTINUED 0x80 /* Flag swap_map continuation for full count */ /* Special value in first swap_map */ #define SWAP_MAP_MAX 0x3e /* Max count */ #define SWAP_MAP_BAD 0x3f /* Note page is bad */ #define SWAP_MAP_SHMEM 0xbf /* Owned by shmem/tmpfs */ /* Special value in each swap_map continuation */ #define SWAP_CONT_MAX 0x7f /* Max count */ /* * We use this to track usage of a cluster. A cluster is a block of swap disk * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All * free clusters are organized into a list. We fetch an entry from the list to * get a free cluster. * * The data field stores next cluster if the cluster is free or cluster usage * counter otherwise. The flags field determines if a cluster is free. This is * protected by swap_info_struct.lock. */ struct swap_cluster_info { spinlock_t lock; /* * Protect swap_cluster_info fields * and swap_info_struct->swap_map * elements correspond to the swap * cluster */ unsigned int data:24; unsigned int flags:8; }; #define CLUSTER_FLAG_FREE 1 /* This cluster is free */ #define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */ #define CLUSTER_FLAG_HUGE 4 /* This cluster is backing a transparent huge page */ /* * We assign a cluster to each CPU, so each CPU can allocate swap entry from * its own cluster and swapout sequentially. The purpose is to optimize swapout * throughput. */ struct percpu_cluster { struct swap_cluster_info index; /* Current cluster index */ unsigned int next; /* Likely next allocation offset */ }; struct swap_cluster_list { struct swap_cluster_info head; struct swap_cluster_info tail; }; /* * The in-memory structure used to track swap areas. */ struct swap_info_struct { unsigned long flags; /* SWP_USED etc: see above */ signed short prio; /* swap priority of this type */ struct plist_node list; /* entry in swap_active_head */ signed char type; /* strange name for an index */ unsigned int max; /* extent of the swap_map */ unsigned char *swap_map; /* vmalloc'ed array of usage counts */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ struct swap_cluster_list free_clusters; /* free clusters list */ unsigned int lowest_bit; /* index of first free in swap_map */ unsigned int highest_bit; /* index of last free in swap_map */ unsigned int pages; /* total of usable pages of swap */ unsigned int inuse_pages; /* number of those currently in use */ unsigned int cluster_next; /* likely index for next allocation */ unsigned int cluster_nr; /* countdown to next cluster search */ unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */ struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */ struct rb_root swap_extent_root;/* root of the swap extent rbtree */ struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ unsigned int old_block_size; /* seldom referenced */ #ifdef CONFIG_FRONTSWAP unsigned long *frontswap_map; /* frontswap in-use, one bit per page */ atomic_t frontswap_pages; /* frontswap pages in-use counter */ #endif spinlock_t lock; /* * protect map scan related fields like * swap_map, lowest_bit, highest_bit, * inuse_pages, cluster_next, * cluster_nr, lowest_alloc, * highest_alloc, free/discard cluster * list. other fields are only changed * at swapon/swapoff, so are protected * by swap_lock. changing flags need * hold this lock and swap_lock. If * both locks need hold, hold swap_lock * first. */ spinlock_t cont_lock; /* * protect swap count continuation page * list. */ struct work_struct discard_work; /* discard worker */ struct swap_cluster_list discard_clusters; /* discard clusters list */ struct plist_node avail_lists[]; /* * entries in swap_avail_heads, one * entry per node. * Must be last as the number of the * array is nr_node_ids, which is not * a fixed value so have to allocate * dynamically. * And it has to be an array so that * plist_for_each_* can work. */ }; #ifdef CONFIG_64BIT #define SWAP_RA_ORDER_CEILING 5 #else /* Avoid stack overflow, because we need to save part of page table */ #define SWAP_RA_ORDER_CEILING 3 #define SWAP_RA_PTE_CACHE_SIZE (1 << SWAP_RA_ORDER_CEILING) #endif struct vma_swap_readahead { unsigned short win; unsigned short offset; unsigned short nr_pte; #ifdef CONFIG_64BIT pte_t *ptes; #else pte_t ptes[SWAP_RA_PTE_CACHE_SIZE]; #endif }; /* linux/mm/workingset.c */ void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg); void workingset_refault(struct page *page, void *shadow); void workingset_activation(struct page *page); /* Only track the nodes of mappings with shadow entries */ void workingset_update_node(struct xa_node *node); #define mapping_set_update(xas, mapping) do { \ if (!dax_mapping(mapping) && !shmem_mapping(mapping)) \ xas_set_update(xas, workingset_update_node); \ } while (0) /* linux/mm/page_alloc.c */ extern unsigned long totalreserve_pages; extern unsigned long nr_free_buffer_pages(void); /* Definition of global_zone_page_state not available yet */ #define nr_free_pages() global_zone_page_state(NR_FREE_PAGES) /* linux/mm/swap.c */ extern void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages); extern void lru_note_cost_page(struct page *); extern void lru_cache_add(struct page *); extern void lru_add_page_tail(struct page *page, struct page *page_tail, struct lruvec *lruvec, struct list_head *head); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); extern void rotate_reclaimable_page(struct page *page); extern void deactivate_file_page(struct page *page); extern void deactivate_page(struct page *page); extern void mark_page_lazyfree(struct page *page); extern void swap_setup(void); extern void lru_cache_add_inactive_or_unevictable(struct page *page, struct vm_area_struct *vma); /* linux/mm/vmscan.c */ extern unsigned long zone_reclaimable_pages(struct zone *zone); extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, bool may_swap); extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, pg_data_t *pgdat, unsigned long *nr_scanned); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; extern int remove_mapping(struct address_space *mapping, struct page *page); extern unsigned long reclaim_pages(struct list_head *page_list); #ifdef CONFIG_NUMA extern int node_reclaim_mode; extern int sysctl_min_unmapped_ratio; extern int sysctl_min_slab_ratio; #else #define node_reclaim_mode 0 #endif extern void check_move_unevictable_pages(struct pagevec *pvec); extern int kswapd_run(int nid); extern void kswapd_stop(int nid); #ifdef CONFIG_SWAP #include <linux/blk_types.h> /* for bio_end_io_t */ /* linux/mm/page_io.c */ extern int swap_readpage(struct page *page, bool do_poll); extern int swap_writepage(struct page *page, struct writeback_control *wbc); extern void end_swap_bio_write(struct bio *bio); extern int __swap_writepage(struct page *page, struct writeback_control *wbc, bio_end_io_t end_write_func); extern int swap_set_page_dirty(struct page *page); int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block); int generic_swapfile_activate(struct swap_info_struct *, struct file *, sector_t *); /* linux/mm/swap_state.c */ /* One swap address space for each 64M swap space */ #define SWAP_ADDRESS_SPACE_SHIFT 14 #define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT) extern struct address_space *swapper_spaces[]; #define swap_address_space(entry) \ (&swapper_spaces[swp_type(entry)][swp_offset(entry) \ >> SWAP_ADDRESS_SPACE_SHIFT]) extern unsigned long total_swapcache_pages(void); extern void show_swap_cache_info(void); extern int add_to_swap(struct page *page); extern void *get_shadow_from_swap_cache(swp_entry_t entry); extern int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp, void **shadowp); extern void __delete_from_swap_cache(struct page *page, swp_entry_t entry, void *shadow); extern void delete_from_swap_cache(struct page *); extern void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end); extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct page **, int); extern struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr); struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index); extern struct page *read_swap_cache_async(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr, bool do_poll); extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr, bool *new_page_allocated); extern struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t flag, struct vm_fault *vmf); extern struct page *swapin_readahead(swp_entry_t entry, gfp_t flag, struct vm_fault *vmf); /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; extern long total_swap_pages; extern atomic_t nr_rotate_swap; extern bool has_usable_swap(void); /* Swap 50% full? Release swapcache more aggressively.. */ static inline bool vm_swap_full(void) { return atomic_long_read(&nr_swap_pages) * 2 < total_swap_pages; } static inline long get_nr_swap_pages(void) { return atomic_long_read(&nr_swap_pages); } extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(struct page *page); extern void put_swap_page(struct page *page, swp_entry_t entry); extern swp_entry_t get_swap_page_of_type(int); extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); extern int swap_duplicate(swp_entry_t); extern int swapcache_prepare(swp_entry_t); extern void swap_free(swp_entry_t); extern void swapcache_free_entries(swp_entry_t *entries, int n); extern int free_swap_and_cache(swp_entry_t); int swap_type_of(dev_t device, sector_t offset); int find_first_swap(dev_t *device); extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct page *, struct block_device **); extern sector_t swapdev_block(int, pgoff_t); extern int page_swapcount(struct page *); extern int __swap_count(swp_entry_t entry); extern int __swp_swapcount(swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); extern struct swap_info_struct *swp_swap_info(swp_entry_t entry); extern bool reuse_swap_page(struct page *, int *); extern int try_to_free_swap(struct page *); struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); extern void exit_swap_address_space(unsigned int type); extern struct swap_info_struct *get_swap_device(swp_entry_t entry); sector_t swap_page_sector(struct page *page); static inline void put_swap_device(struct swap_info_struct *si) { rcu_read_unlock(); } #else /* CONFIG_SWAP */ static inline int swap_readpage(struct page *page, bool do_poll) { return 0; } static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return NULL; } #define swap_address_space(entry) (NULL) #define get_nr_swap_pages() 0L #define total_swap_pages 0L #define total_swapcache_pages() 0UL #define vm_swap_full() 0 #define si_swapinfo(val) \ do { (val)->freeswap = (val)->totalswap = 0; } while (0) /* only sparc can not include linux/pagemap.h in this file * so leave put_page and release_pages undeclared... */ #define free_page_and_swap_cache(page) \ put_page(page) #define free_pages_and_swap_cache(pages, nr) \ release_pages((pages), (nr)); static inline void show_swap_cache_info(void) { } #define free_swap_and_cache(e) ({(is_migration_entry(e) || is_device_private_entry(e));}) #define swapcache_prepare(e) ({(is_migration_entry(e) || is_device_private_entry(e));}) static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask) { return 0; } static inline void swap_shmem_alloc(swp_entry_t swp) { } static inline int swap_duplicate(swp_entry_t swp) { return 0; } static inline void swap_free(swp_entry_t swp) { } static inline void put_swap_page(struct page *page, swp_entry_t swp) { } static inline struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, struct vm_fault *vmf) { return NULL; } static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, struct vm_fault *vmf) { return NULL; } static inline int swap_writepage(struct page *p, struct writeback_control *wbc) { return 0; } static inline struct page *lookup_swap_cache(swp_entry_t swp, struct vm_area_struct *vma, unsigned long addr) { return NULL; } static inline struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index) { return find_get_page(mapping, index); } static inline int add_to_swap(struct page *page) { return 0; } static inline void *get_shadow_from_swap_cache(swp_entry_t entry) { return NULL; } static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask, void **shadowp) { return -1; } static inline void __delete_from_swap_cache(struct page *page, swp_entry_t entry, void *shadow) { } static inline void delete_from_swap_cache(struct page *page) { } static inline void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end) { } static inline int page_swapcount(struct page *page) { return 0; } static inline int __swap_count(swp_entry_t entry) { return 0; } static inline int __swp_swapcount(swp_entry_t entry) { return 0; } static inline int swp_swapcount(swp_entry_t entry) { return 0; } #define reuse_swap_page(page, total_map_swapcount) \ (page_trans_huge_mapcount(page, total_map_swapcount) == 1) static inline int try_to_free_swap(struct page *page) { return 0; } static inline swp_entry_t get_swap_page(struct page *page) { swp_entry_t entry; entry.val = 0; return entry; } #endif /* CONFIG_SWAP */ #ifdef CONFIG_THP_SWAP extern int split_swap_cluster(swp_entry_t entry); #else static inline int split_swap_cluster(swp_entry_t entry) { return 0; } #endif #ifdef CONFIG_MEMCG static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg) { /* Cgroup2 doesn't have per-cgroup swappiness */ if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) return vm_swappiness; /* root ? */ if (mem_cgroup_disabled() || mem_cgroup_is_root(memcg)) return vm_swappiness; return memcg->swappiness; } #else static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) { return vm_swappiness; } #endif #if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) extern void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask); #else static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) { } #endif #ifdef CONFIG_MEMCG_SWAP extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry); extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); extern void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg); extern bool mem_cgroup_swap_full(struct page *page); #else static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry) { } static inline int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) { return 0; } static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) { } static inline long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg) { return get_nr_swap_pages(); } static inline bool mem_cgroup_swap_full(struct page *page) { return vm_swap_full(); } #endif #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 // SPDX-License-Identifier: GPL-2.0+ /* * XArray implementation * Copyright (c) 2017-2018 Microsoft Corporation * Copyright (c) 2018-2020 Oracle * Author: Matthew Wilcox <willy@infradead.org> */ #include <linux/bitmap.h> #include <linux/export.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/xarray.h> /* * Coding conventions in this file: * * @xa is used to refer to the entire xarray. * @xas is the 'xarray operation state'. It may be either a pointer to * an xa_state, or an xa_state stored on the stack. This is an unfortunate * ambiguity. * @index is the index of the entry being operated on * @mark is an xa_mark_t; a small number indicating one of the mark bits. * @node refers to an xa_node; usually the primary one being operated on by * this function. * @offset is the index into the slots array inside an xa_node. * @parent refers to the @xa_node closer to the head than @node. * @entry refers to something stored in a slot in the xarray */ static inline unsigned int xa_lock_type(const struct xarray *xa) { return (__force unsigned int)xa->xa_flags & 3; } static inline void xas_lock_type(struct xa_state *xas, unsigned int lock_type) { if (lock_type == XA_LOCK_IRQ) xas_lock_irq(xas); else if (lock_type == XA_LOCK_BH) xas_lock_bh(xas); else xas_lock(xas); } static inline void xas_unlock_type(struct xa_state *xas, unsigned int lock_type) { if (lock_type == XA_LOCK_IRQ) xas_unlock_irq(xas); else if (lock_type == XA_LOCK_BH) xas_unlock_bh(xas); else xas_unlock(xas); } static inline bool xa_track_free(const struct xarray *xa) { return xa->xa_flags & XA_FLAGS_TRACK_FREE; } static inline bool xa_zero_busy(const struct xarray *xa) { return xa->xa_flags & XA_FLAGS_ZERO_BUSY; } static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark) { if (!(xa->xa_flags & XA_FLAGS_MARK(mark))) xa->xa_flags |= XA_FLAGS_MARK(mark); } static inline void xa_mark_clear(struct xarray *xa, xa_mark_t mark) { if (xa->xa_flags & XA_FLAGS_MARK(mark)) xa->xa_flags &= ~(XA_FLAGS_MARK(mark)); } static inline unsigned long *node_marks(struct xa_node *node, xa_mark_t mark) { return node->marks[(__force unsigned)mark]; } static inline bool node_get_mark(struct xa_node *node, unsigned int offset, xa_mark_t mark) { return test_bit(offset, node_marks(node, mark)); } /* returns true if the bit was set */ static inline bool node_set_mark(struct xa_node *node, unsigned int offset, xa_mark_t mark) { return __test_and_set_bit(offset, node_marks(node, mark)); } /* returns true if the bit was set */ static inline bool node_clear_mark(struct xa_node *node, unsigned int offset, xa_mark_t mark) { return __test_and_clear_bit(offset, node_marks(node, mark)); } static inline bool node_any_mark(struct xa_node *node, xa_mark_t mark) { return !bitmap_empty(node_marks(node, mark), XA_CHUNK_SIZE); } static inline void node_mark_all(struct xa_node *node, xa_mark_t mark) { bitmap_fill(node_marks(node, mark), XA_CHUNK_SIZE); } #define mark_inc(mark) do { \ mark = (__force xa_mark_t)((__force unsigned)(mark) + 1); \ } while (0) /* * xas_squash_marks() - Merge all marks to the first entry * @xas: Array operation state. * * Set a mark on the first entry if any entry has it set. Clear marks on * all sibling entries. */ static void xas_squash_marks(const struct xa_state *xas) { unsigned int mark = 0; unsigned int limit = xas->xa_offset + xas->xa_sibs + 1; if (!xas->xa_sibs) return; do { unsigned long *marks = xas->xa_node->marks[mark]; if (find_next_bit(marks, limit, xas->xa_offset + 1) == limit) continue; __set_bit(xas->xa_offset, marks); bitmap_clear(marks, xas->xa_offset + 1, xas->xa_sibs); } while (mark++ != (__force unsigned)XA_MARK_MAX); } /* extracts the offset within this node from the index */ static unsigned int get_offset(unsigned long index, struct xa_node *node) { return (index >> node->shift) & XA_CHUNK_MASK; } static void xas_set_offset(struct xa_state *xas) { xas->xa_offset = get_offset(xas->xa_index, xas->xa_node); } /* move the index either forwards (find) or backwards (sibling slot) */ static void xas_move_index(struct xa_state *xas, unsigned long offset) { unsigned int shift = xas->xa_node->shift; xas->xa_index &= ~XA_CHUNK_MASK << shift; xas->xa_index += offset << shift; } static void xas_advance(struct xa_state *xas) { xas->xa_offset++; xas_move_index(xas, xas->xa_offset); } static void *set_bounds(struct xa_state *xas) { xas->xa_node = XAS_BOUNDS; return NULL; } /* * Starts a walk. If the @xas is already valid, we assume that it's on * the right path and just return where we've got to. If we're in an * error state, return NULL. If the index is outside the current scope * of the xarray, return NULL without changing @xas->xa_node. Otherwise * set @xas->xa_node to NULL and return the current head of the array. */ static void *xas_start(struct xa_state *xas) { void *entry; if (xas_valid(xas)) return xas_reload(xas); if (xas_error(xas)) return NULL; entry = xa_head(xas->xa); if (!xa_is_node(entry)) { if (xas->xa_index) return set_bounds(xas); } else { if ((xas->xa_index >> xa_to_node(entry)->shift) > XA_CHUNK_MASK) return set_bounds(xas); } xas->xa_node = NULL; return entry; } static void *xas_descend(struct xa_state *xas, struct xa_node *node) { unsigned int offset = get_offset(xas->xa_index, node); void *entry = xa_entry(xas->xa, node, offset); xas->xa_node = node; if (xa_is_sibling(entry)) { offset = xa_to_sibling(entry); entry = xa_entry(xas->xa, node, offset); } xas->xa_offset = offset; return entry; } /** * xas_load() - Load an entry from the XArray (advanced). * @xas: XArray operation state. * * Usually walks the @xas to the appropriate state to load the entry * stored at xa_index. However, it will do nothing and return %NULL if * @xas is in an error state. xas_load() will never expand the tree. * * If the xa_state is set up to operate on a multi-index entry, xas_load() * may return %NULL or an internal entry, even if there are entries * present within the range specified by @xas. * * Context: Any context. The caller should hold the xa_lock or the RCU lock. * Return: Usually an entry in the XArray, but see description for exceptions. */ void *xas_load(struct xa_state *xas) { void *entry = xas_start(xas); while (xa_is_node(entry)) { struct xa_node *node = xa_to_node(entry); if (xas->xa_shift > node->shift) break; entry = xas_descend(xas, node); if (node->shift == 0) break; } return entry; } EXPORT_SYMBOL_GPL(xas_load); /* Move the radix tree node cache here */ extern struct kmem_cache *radix_tree_node_cachep; extern void radix_tree_node_rcu_free(struct rcu_head *head); #define XA_RCU_FREE ((struct xarray *)1) static void xa_node_free(struct xa_node *node) { XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); node->array = XA_RCU_FREE; call_rcu(&node->rcu_head, radix_tree_node_rcu_free); } /* * xas_destroy() - Free any resources allocated during the XArray operation. * @xas: XArray operation state. * * This function is now internal-only. */ static void xas_destroy(struct xa_state *xas) { struct xa_node *next, *node = xas->xa_alloc; while (node) { XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); next = rcu_dereference_raw(node->parent); radix_tree_node_rcu_free(&node->rcu_head); xas->xa_alloc = node = next; } } /** * xas_nomem() - Allocate memory if needed. * @xas: XArray operation state. * @gfp: Memory allocation flags. * * If we need to add new nodes to the XArray, we try to allocate memory * with GFP_NOWAIT while holding the lock, which will usually succeed. * If it fails, @xas is flagged as needing memory to continue. The caller * should drop the lock and call xas_nomem(). If xas_nomem() succeeds, * the caller should retry the operation. * * Forward progress is guaranteed as one node is allocated here and * stored in the xa_state where it will be found by xas_alloc(). More * nodes will likely be found in the slab allocator, but we do not tie * them up here. * * Return: true if memory was needed, and was successfully allocated. */ bool xas_nomem(struct xa_state *xas, gfp_t gfp) { if (xas->xa_node != XA_ERROR(-ENOMEM)) { xas_destroy(xas); return false; } if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) gfp |= __GFP_ACCOUNT; xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); if (!xas->xa_alloc) return false; xas->xa_alloc->parent = NULL; XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list)); xas->xa_node = XAS_RESTART; return true; } EXPORT_SYMBOL_GPL(xas_nomem); /* * __xas_nomem() - Drop locks and allocate memory if needed. * @xas: XArray operation state. * @gfp: Memory allocation flags. * * Internal variant of xas_nomem(). * * Return: true if memory was needed, and was successfully allocated. */ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp) __must_hold(xas->xa->xa_lock) { unsigned int lock_type = xa_lock_type(xas->xa); if (xas->xa_node != XA_ERROR(-ENOMEM)) { xas_destroy(xas); return false; } if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) gfp |= __GFP_ACCOUNT; if (gfpflags_allow_blocking(gfp)) { xas_unlock_type(xas, lock_type); xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); xas_lock_type(xas, lock_type); } else { xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); } if (!xas->xa_alloc) return false; xas->xa_alloc->parent = NULL; XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list)); xas->xa_node = XAS_RESTART; return true; } static void xas_update(struct xa_state *xas, struct xa_node *node) { if (xas->xa_update) xas->xa_update(node); else XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); } static void *xas_alloc(struct xa_state *xas, unsigned int shift) { struct xa_node *parent = xas->xa_node; struct xa_node *node = xas->xa_alloc; if (xas_invalid(xas)) return NULL; if (node) { xas->xa_alloc = NULL; } else { gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN; if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) gfp |= __GFP_ACCOUNT; node = kmem_cache_alloc(radix_tree_node_cachep, gfp); if (!node) { xas_set_err(xas, -ENOMEM); return NULL; } } if (parent) { node->offset = xas->xa_offset; parent->count++; XA_NODE_BUG_ON(node, parent->count > XA_CHUNK_SIZE); xas_update(xas, parent); } XA_NODE_BUG_ON(node, shift > BITS_PER_LONG); XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); node->shift = shift; node->count = 0; node->nr_values = 0; RCU_INIT_POINTER(node->parent, xas->xa_node); node->array = xas->xa; return node; } #ifdef CONFIG_XARRAY_MULTI /* Returns the number of indices covered by a given xa_state */ static unsigned long xas_size(const struct xa_state *xas) { return (xas->xa_sibs + 1UL) << xas->xa_shift; } #endif /* * Use this to calculate the maximum index that will need to be created * in order to add the entry described by @xas. Because we cannot store a * multi-index entry at index 0, the calculation is a little more complex * than you might expect. */ static unsigned long xas_max(struct xa_state *xas) { unsigned long max = xas->xa_index; #ifdef CONFIG_XARRAY_MULTI if (xas->xa_shift || xas->xa_sibs) { unsigned long mask = xas_size(xas) - 1; max |= mask; if (mask == max) max++; } #endif return max; } /* The maximum index that can be contained in the array without expanding it */ static unsigned long max_index(void *entry) { if (!xa_is_node(entry)) return 0; return (XA_CHUNK_SIZE << xa_to_node(entry)->shift) - 1; } static void xas_shrink(struct xa_state *xas) { struct xarray *xa = xas->xa; struct xa_node *node = xas->xa_node; for (;;) { void *entry; XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE); if (node->count != 1) break; entry = xa_entry_locked(xa, node, 0); if (!entry) break; if (!xa_is_node(entry) && node->shift) break; if (xa_is_zero(entry) && xa_zero_busy(xa)) entry = NULL; xas->xa_node = XAS_BOUNDS; RCU_INIT_POINTER(xa->xa_head, entry); if (xa_track_free(xa) && !node_get_mark(node, 0, XA_FREE_MARK)) xa_mark_clear(xa, XA_FREE_MARK); node->count = 0; node->nr_values = 0; if (!xa_is_node(entry)) RCU_INIT_POINTER(node->slots[0], XA_RETRY_ENTRY); xas_update(xas, node); xa_node_free(node); if (!xa_is_node(entry)) break; node = xa_to_node(entry); node->parent = NULL; } } /* * xas_delete_node() - Attempt to delete an xa_node * @xas: Array operation state. * * Attempts to delete the @xas->xa_node. This will fail if xa->node has * a non-zero reference count. */ static void xas_delete_node(struct xa_state *xas) { struct xa_node *node = xas->xa_node; for (;;) { struct xa_node *parent; XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE); if (node->count) break; parent = xa_parent_locked(xas->xa, node); xas->xa_node = parent; xas->xa_offset = node->offset; xa_node_free(node); if (!parent) { xas->xa->xa_head = NULL; xas->xa_node = XAS_BOUNDS; return; } parent->slots[xas->xa_offset] = NULL; parent->count--; XA_NODE_BUG_ON(parent, parent->count > XA_CHUNK_SIZE); node = parent; xas_update(xas, node); } if (!node->parent) xas_shrink(xas); } /** * xas_free_nodes() - Free this node and all nodes that it references * @xas: Array operation state. * @top: Node to free * * This node has been removed from the tree. We must now free it and all * of its subnodes. There may be RCU walkers with references into the tree, * so we must replace all entries with retry markers. */ static void xas_free_nodes(struct xa_state *xas, struct xa_node *top) { unsigned int offset = 0; struct xa_node *node = top; for (;;) { void *entry = xa_entry_locked(xas->xa, node, offset); if (node->shift && xa_is_node(entry)) { node = xa_to_node(entry); offset = 0; continue; } if (entry) RCU_INIT_POINTER(node->slots[offset], XA_RETRY_ENTRY); offset++; while (offset == XA_CHUNK_SIZE) { struct xa_node *parent; parent = xa_parent_locked(xas->xa, node); offset = node->offset + 1; node->count = 0; node->nr_values = 0; xas_update(xas, node); xa_node_free(node); if (node == top) return; node = parent; } } } /* * xas_expand adds nodes to the head of the tree until it has reached * sufficient height to be able to contain @xas->xa_index */ static int xas_expand(struct xa_state *xas, void *head) { struct xarray *xa = xas->xa; struct xa_node *node = NULL; unsigned int shift = 0; unsigned long max = xas_max(xas); if (!head) { if (max == 0) return 0; while ((max >> shift) >= XA_CHUNK_SIZE) shift += XA_CHUNK_SHIFT; return shift + XA_CHUNK_SHIFT; } else if (xa_is_node(head)) { node = xa_to_node(head); shift = node->shift + XA_CHUNK_SHIFT; } xas->xa_node = NULL; while (max > max_index(head)) { xa_mark_t mark = 0; XA_NODE_BUG_ON(node, shift > BITS_PER_LONG); node = xas_alloc(xas, shift); if (!node) return -ENOMEM; node->count = 1; if (xa_is_value(head)) node->nr_values = 1; RCU_INIT_POINTER(node->slots[0], head); /* Propagate the aggregated mark info to the new child */ for (;;) { if (xa_track_free(xa) && mark == XA_FREE_MARK) { node_mark_all(node, XA_FREE_MARK); if (!xa_marked(xa, XA_FREE_MARK)) { node_clear_mark(node, 0, XA_FREE_MARK); xa_mark_set(xa, XA_FREE_MARK); } } else if (xa_marked(xa, mark)) { node_set_mark(node, 0, mark); } if (mark == XA_MARK_MAX) break; mark_inc(mark); } /* * Now that the new node is fully initialised, we can add * it to the tree */ if (xa_is_node(head)) { xa_to_node(head)->offset = 0; rcu_assign_pointer(xa_to_node(head)->parent, node); } head = xa_mk_node(node); rcu_assign_pointer(xa->xa_head, head); xas_update(xas, node); shift += XA_CHUNK_SHIFT; } xas->xa_node = node; return shift; } /* * xas_create() - Create a slot to store an entry in. * @xas: XArray operation state. * @allow_root: %true if we can store the entry in the root directly * * Most users will not need to call this function directly, as it is called * by xas_store(). It is useful for doing conditional store operations * (see the xa_cmpxchg() implementation for an example). * * Return: If the slot already existed, returns the contents of this slot. * If the slot was newly created, returns %NULL. If it failed to create the * slot, returns %NULL and indicates the error in @xas. */ static void *xas_create(struct xa_state *xas, bool allow_root) { struct xarray *xa = xas->xa; void *entry; void __rcu **slot; struct xa_node *node = xas->xa_node; int shift; unsigned int order = xas->xa_shift; if (xas_top(node)) { entry = xa_head_locked(xa); xas->xa_node = NULL; if (!entry && xa_zero_busy(xa)) entry = XA_ZERO_ENTRY; shift = xas_expand(xas, entry); if (shift < 0) return NULL; if (!shift && !allow_root) shift = XA_CHUNK_SHIFT; entry = xa_head_locked(xa); slot = &xa->xa_head; } else if (xas_error(xas)) { return NULL; } else if (node) { unsigned int offset = xas->xa_offset; shift = node->shift; entry = xa_entry_locked(xa, node, offset); slot = &node->slots[offset]; } else { shift = 0; entry = xa_head_locked(xa); slot = &xa->xa_head; } while (shift > order) { shift -= XA_CHUNK_SHIFT; if (!entry) { node = xas_alloc(xas, shift); if (!node) break; if (xa_track_free(xa)) node_mark_all(node, XA_FREE_MARK); rcu_assign_pointer(*slot, xa_mk_node(node)); } else if (xa_is_node(entry)) { node = xa_to_node(entry); } else { break; } entry = xas_descend(xas, node); slot = &node->slots[xas->xa_offset]; } return entry; } /** * xas_create_range() - Ensure that stores to this range will succeed * @xas: XArray operation state. * * Creates all of the slots in the range covered by @xas. Sets @xas to * create single-index entries and positions it at the beginning of the * range. This is for the benefit of users which have not yet been * converted to use multi-index entries. */ void xas_create_range(struct xa_state *xas) { unsigned long index = xas->xa_index; unsigned char shift = xas->xa_shift; unsigned char sibs = xas->xa_sibs; xas->xa_index |= ((sibs + 1UL) << shift) - 1; if (xas_is_node(xas) && xas->xa_node->shift == xas->xa_shift) xas->xa_offset |= sibs; xas->xa_shift = 0; xas->xa_sibs = 0; for (;;) { xas_create(xas, true); if (xas_error(xas)) goto restore; if (xas->xa_index <= (index | XA_CHUNK_MASK)) goto success; xas->xa_index -= XA_CHUNK_SIZE; for (;;) { struct xa_node *node = xas->xa_node; xas->xa_node = xa_parent_locked(xas->xa, node); xas->xa_offset = node->offset - 1; if (node->offset != 0) break; } } restore: xas->xa_shift = shift; xas->xa_sibs = sibs; xas->xa_index = index; return; success: xas->xa_index = index; if (xas->xa_node) xas_set_offset(xas); } EXPORT_SYMBOL_GPL(xas_create_range); static void update_node(struct xa_state *xas, struct xa_node *node, int count, int values) { if (!node || (!count && !values)) return; node->count += count; node->nr_values += values; XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE); XA_NODE_BUG_ON(node, node->nr_values > XA_CHUNK_SIZE); xas_update(xas, node); if (count < 0) xas_delete_node(xas); } /** * xas_store() - Store this entry in the XArray. * @xas: XArray operation state. * @entry: New entry. * * If @xas is operating on a multi-index entry, the entry returned by this * function is essentially meaningless (it may be an internal entry or it * may be %NULL, even if there are non-NULL entries at some of the indices * covered by the range). This is not a problem for any current users, * and can be changed if needed. * * Return: The old entry at this index. */ void *xas_store(struct xa_state *xas, void *entry) { struct xa_node *node; void __rcu **slot = &xas->xa->xa_head; unsigned int offset, max; int count = 0; int values = 0; void *first, *next; bool value = xa_is_value(entry); if (entry) { bool allow_root = !xa_is_node(entry) && !xa_is_zero(entry); first = xas_create(xas, allow_root); } else { first = xas_load(xas); } if (xas_invalid(xas)) return first; node = xas->xa_node; if (node && (xas->xa_shift < node->shift)) xas->xa_sibs = 0; if ((first == entry) && !xas->xa_sibs) return first; next = first; offset = xas->xa_offset; max = xas->xa_offset + xas->xa_sibs; if (node) { slot = &node->slots[offset]; if (xas->xa_sibs) xas_squash_marks(xas); } if (!entry) xas_init_marks(xas); for (;;) { /* * Must clear the marks before setting the entry to NULL, * otherwise xas_for_each_marked may find a NULL entry and * stop early. rcu_assign_pointer contains a release barrier * so the mark clearing will appear to happen before the * entry is set to NULL. */ rcu_assign_pointer(*slot, entry); if (xa_is_node(next) && (!node || node->shift)) xas_free_nodes(xas, xa_to_node(next)); if (!node) break; count += !next - !entry; values += !xa_is_value(first) - !value; if (entry) { if (offset == max) break; if (!xa_is_sibling(entry)) entry = xa_mk_sibling(xas->xa_offset); } else { if (offset == XA_CHUNK_MASK) break; } next = xa_entry_locked(xas->xa, node, ++offset); if (!xa_is_sibling(next)) { if (!entry && (offset > max)) break; first = next; } slot++; } update_node(xas, node, count, values); return first; } EXPORT_SYMBOL_GPL(xas_store); /** * xas_get_mark() - Returns the state of this mark. * @xas: XArray operation state. * @mark: Mark number. * * Return: true if the mark is set, false if the mark is clear or @xas * is in an error state. */ bool xas_get_mark(const struct xa_state *xas, xa_mark_t mark) { if (xas_invalid(xas)) return false; if (!xas->xa_node) return xa_marked(xas->xa, mark); return node_get_mark(xas->xa_node, xas->xa_offset, mark); } EXPORT_SYMBOL_GPL(xas_get_mark); /** * xas_set_mark() - Sets the mark on this entry and its parents. * @xas: XArray operation state. * @mark: Mark number. * * Sets the specified mark on this entry, and walks up the tree setting it * on all the ancestor entries. Does nothing if @xas has not been walked to * an entry, or is in an error state. */ void xas_set_mark(const struct xa_state *xas, xa_mark_t mark) { struct xa_node *node = xas->xa_node; unsigned int offset = xas->xa_offset; if (xas_invalid(xas)) return; while (node) { if (node_set_mark(node, offset, mark)) return; offset = node->offset; node = xa_parent_locked(xas->xa, node); } if (!xa_marked(xas->xa, mark)) xa_mark_set(xas->xa, mark); } EXPORT_SYMBOL_GPL(xas_set_mark); /** * xas_clear_mark() - Clears the mark on this entry and its parents. * @xas: XArray operation state. * @mark: Mark number. * * Clears the specified mark on this entry, and walks back to the head * attempting to clear it on all the ancestor entries. Does nothing if * @xas has not been walked to an entry, or is in an error state. */ void xas_clear_mark(const struct xa_state *xas, xa_mark_t mark) { struct xa_node *node = xas->xa_node; unsigned int offset = xas->xa_offset; if (xas_invalid(xas)) return; while (node) { if (!node_clear_mark(node, offset, mark)) return; if (node_any_mark(node, mark)) return; offset = node->offset; node = xa_parent_locked(xas->xa, node); } if (xa_marked(xas->xa, mark)) xa_mark_clear(xas->xa, mark); } EXPORT_SYMBOL_GPL(xas_clear_mark); /** * xas_init_marks() - Initialise all marks for the entry * @xas: Array operations state. * * Initialise all marks for the entry specified by @xas. If we're tracking * free entries with a mark, we need to set it on all entries. All other * marks are cleared. * * This implementation is not as efficient as it could be; we may walk * up the tree multiple times. */ void xas_init_marks(const struct xa_state *xas) { xa_mark_t mark = 0; for (;;) { if (xa_track_free(xas->xa) && mark == XA_FREE_MARK) xas_set_mark(xas, mark); else xas_clear_mark(xas, mark); if (mark == XA_MARK_MAX) break; mark_inc(mark); } } EXPORT_SYMBOL_GPL(xas_init_marks); #ifdef CONFIG_XARRAY_MULTI static unsigned int node_get_marks(struct xa_node *node, unsigned int offset) { unsigned int marks = 0; xa_mark_t mark = XA_MARK_0; for (;;) { if (node_get_mark(node, offset, mark)) marks |= 1 << (__force unsigned int)mark; if (mark == XA_MARK_MAX) break; mark_inc(mark); } return marks; } static void node_set_marks(struct xa_node *node, unsigned int offset, struct xa_node *child, unsigned int marks) { xa_mark_t mark = XA_MARK_0; for (;;) { if (marks & (1 << (__force unsigned int)mark)) { node_set_mark(node, offset, mark); if (child) node_mark_all(child, mark); } if (mark == XA_MARK_MAX) break; mark_inc(mark); } } /** * xas_split_alloc() - Allocate memory for splitting an entry. * @xas: XArray operation state. * @entry: New entry which will be stored in the array. * @order: New entry order. * @gfp: Memory allocation flags. * * This function should be called before calling xas_split(). * If necessary, it will allocate new nodes (and fill them with @entry) * to prepare for the upcoming split of an entry of @order size into * entries of the order stored in the @xas. * * Context: May sleep if @gfp flags permit. */ void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order, gfp_t gfp) { unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1; unsigned int mask = xas->xa_sibs; /* XXX: no support for splitting really large entries yet */ if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT < order)) goto nomem; if (xas->xa_shift + XA_CHUNK_SHIFT > order) return; do { unsigned int i; void *sibling = NULL; struct xa_node *node; node = kmem_cache_alloc(radix_tree_node_cachep, gfp); if (!node) goto nomem; node->array = xas->xa; for (i = 0; i < XA_CHUNK_SIZE; i++) { if ((i & mask) == 0) { RCU_INIT_POINTER(node->slots[i], entry); sibling = xa_mk_sibling(i); } else { RCU_INIT_POINTER(node->slots[i], sibling); } } RCU_INIT_POINTER(node->parent, xas->xa_alloc); xas->xa_alloc = node; } while (sibs-- > 0); return; nomem: xas_destroy(xas); xas_set_err(xas, -ENOMEM); } EXPORT_SYMBOL_GPL(xas_split_alloc); /** * xas_split() - Split a multi-index entry into smaller entries. * @xas: XArray operation state. * @entry: New entry to store in the array. * @order: New entry order. * * The value in the entry is copied to all the replacement entries. * * Context: Any context. The caller should hold the xa_lock. */ void xas_split(struct xa_state *xas, void *entry, unsigned int order) { unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1; unsigned int offset, marks; struct xa_node *node; void *curr = xas_load(xas); int values = 0; node = xas->xa_node; if (xas_top(node)) return; marks = node_get_marks(node, xas->xa_offset); offset = xas->xa_offset + sibs; do { if (xas->xa_shift < node->shift) { struct xa_node *child = xas->xa_alloc; xas->xa_alloc = rcu_dereference_raw(child->parent); child->shift = node->shift - XA_CHUNK_SHIFT; child->offset = offset; child->count = XA_CHUNK_SIZE; child->nr_values = xa_is_value(entry) ? XA_CHUNK_SIZE : 0; RCU_INIT_POINTER(child->parent, node); node_set_marks(node, offset, child, marks); rcu_assign_pointer(node->slots[offset], xa_mk_node(child)); if (xa_is_value(curr)) values--; } else { unsigned int canon = offset - xas->xa_sibs; node_set_marks(node, canon, NULL, marks); rcu_assign_pointer(node->slots[canon], entry); while (offset > canon) rcu_assign_pointer(node->slots[offset--], xa_mk_sibling(canon)); values += (xa_is_value(entry) - xa_is_value(curr)) * (xas->xa_sibs + 1); } } while (offset-- > xas->xa_offset); node->nr_values += values; } EXPORT_SYMBOL_GPL(xas_split); #endif /** * xas_pause() - Pause a walk to drop a lock. * @xas: XArray operation state. * * Some users need to pause a walk and drop the lock they're holding in * order to yield to a higher priority thread or carry out an operation * on an entry. Those users should call this function before they drop * the lock. It resets the @xas to be suitable for the next iteration * of the loop after the user has reacquired the lock. If most entries * found during a walk require you to call xas_pause(), the xa_for_each() * iterator may be more appropriate. * * Note that xas_pause() only works for forward iteration. If a user needs * to pause a reverse iteration, we will need a xas_pause_rev(). */ void xas_pause(struct xa_state *xas) { struct xa_node *node = xas->xa_node; if (xas_invalid(xas)) return; xas->xa_node = XAS_RESTART; if (node) { unsigned long offset = xas->xa_offset; while (++offset < XA_CHUNK_SIZE) { if (!xa_is_sibling(xa_entry(xas->xa, node, offset))) break; } xas->xa_index += (offset - xas->xa_offset) << node->shift; if (xas->xa_index == 0) xas->xa_node = XAS_BOUNDS; } else { xas->xa_index++; } } EXPORT_SYMBOL_GPL(xas_pause); /* * __xas_prev() - Find the previous entry in the XArray. * @xas: XArray operation state. * * Helper function for xas_prev() which handles all the complex cases * out of line. */ void *__xas_prev(struct xa_state *xas) { void *entry; if (!xas_frozen(xas->xa_node)) xas->xa_index--; if (!xas->xa_node) return set_bounds(xas); if (xas_not_node(xas->xa_node)) return xas_load(xas); if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node)) xas->xa_offset--; while (xas->xa_offset == 255) { xas->xa_offset = xas->xa_node->offset - 1; xas->xa_node = xa_parent(xas->xa, xas->xa_node); if (!xas->xa_node) return set_bounds(xas); } for (;;) { entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); if (!xa_is_node(entry)) return entry; xas->xa_node = xa_to_node(entry); xas_set_offset(xas); } } EXPORT_SYMBOL_GPL(__xas_prev); /* * __xas_next() - Find the next entry in the XArray. * @xas: XArray operation state. * * Helper function for xas_next() which handles all the complex cases * out of line. */ void *__xas_next(struct xa_state *xas) { void *entry; if (!xas_frozen(xas->xa_node)) xas->xa_index++; if (!xas->xa_node) return set_bounds(xas); if (xas_not_node(xas->xa_node)) return xas_load(xas); if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node)) xas->xa_offset++; while (xas->xa_offset == XA_CHUNK_SIZE) { xas->xa_offset = xas->xa_node->offset + 1; xas->xa_node = xa_parent(xas->xa, xas->xa_node); if (!xas->xa_node) return set_bounds(xas); } for (;;) { entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); if (!xa_is_node(entry)) return entry; xas->xa_node = xa_to_node(entry); xas_set_offset(xas); } } EXPORT_SYMBOL_GPL(__xas_next); /** * xas_find() - Find the next present entry in the XArray. * @xas: XArray operation state. * @max: Highest index to return. * * If the @xas has not yet been walked to an entry, return the entry * which has an index >= xas.xa_index. If it has been walked, the entry * currently being pointed at has been processed, and so we move to the * next entry. * * If no entry is found and the array is smaller than @max, the iterator * is set to the smallest index not yet in the array. This allows @xas * to be immediately passed to xas_store(). * * Return: The entry, if found, otherwise %NULL. */ void *xas_find(struct xa_state *xas, unsigned long max) { void *entry; if (xas_error(xas) || xas->xa_node == XAS_BOUNDS) return NULL; if (xas->xa_index > max) return set_bounds(xas); if (!xas->xa_node) { xas->xa_index = 1; return set_bounds(xas); } else if (xas->xa_node == XAS_RESTART) { entry = xas_load(xas); if (entry || xas_not_node(xas->xa_node)) return entry; } else if (!xas->xa_node->shift && xas->xa_offset != (xas->xa_index & XA_CHUNK_MASK)) { xas->xa_offset = ((xas->xa_index - 1) & XA_CHUNK_MASK) + 1; } xas_advance(xas); while (xas->xa_node && (xas->xa_index <= max)) { if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) { xas->xa_offset = xas->xa_node->offset + 1; xas->xa_node = xa_parent(xas->xa, xas->xa_node); continue; } entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); if (xa_is_node(entry)) { xas->xa_node = xa_to_node(entry); xas->xa_offset = 0; continue; } if (entry && !xa_is_sibling(entry)) return entry; xas_advance(xas); } if (!xas->xa_node) xas->xa_node = XAS_BOUNDS; return NULL; } EXPORT_SYMBOL_GPL(xas_find); /** * xas_find_marked() - Find the next marked entry in the XArray. * @xas: XArray operation state. * @max: Highest index to return. * @mark: Mark number to search for. * * If the @xas has not yet been walked to an entry, return the marked entry * which has an index >= xas.xa_index. If it has been walked, the entry * currently being pointed at has been processed, and so we return the * first marked entry with an index > xas.xa_index. * * If no marked entry is found and the array is smaller than @max, @xas is * set to the bounds state and xas->xa_index is set to the smallest index * not yet in the array. This allows @xas to be immediately passed to * xas_store(). * * If no entry is found before @max is reached, @xas is set to the restart * state. * * Return: The entry, if found, otherwise %NULL. */ void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark) { bool advance = true; unsigned int offset; void *entry; if (xas_error(xas)) return NULL; if (xas->xa_index > max) goto max; if (!xas->xa_node) { xas->xa_index = 1; goto out; } else if (xas_top(xas->xa_node)) { advance = false; entry = xa_head(xas->xa); xas->xa_node = NULL; if (xas->xa_index > max_index(entry)) goto out; if (!xa_is_node(entry)) { if (xa_marked(xas->xa, mark)) return entry; xas->xa_index = 1; goto out; } xas->xa_node = xa_to_node(entry); xas->xa_offset = xas->xa_index >> xas->xa_node->shift; } while (xas->xa_index <= max) { if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) { xas->xa_offset = xas->xa_node->offset + 1; xas->xa_node = xa_parent(xas->xa, xas->xa_node); if (!xas->xa_node) break; advance = false; continue; } if (!advance) { entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); if (xa_is_sibling(entry)) { xas->xa_offset = xa_to_sibling(entry); xas_move_index(xas, xas->xa_offset); } } offset = xas_find_chunk(xas, advance, mark); if (offset > xas->xa_offset) { advance = false; xas_move_index(xas, offset); /* Mind the wrap */ if ((xas->xa_index - 1) >= max) goto max; xas->xa_offset = offset; if (offset == XA_CHUNK_SIZE) continue; } entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); if (!entry && !(xa_track_free(xas->xa) && mark == XA_FREE_MARK)) continue; if (!xa_is_node(entry)) return entry; xas->xa_node = xa_to_node(entry); xas_set_offset(xas); } out: if (xas->xa_index > max) goto max; return set_bounds(xas); max: xas->xa_node = XAS_RESTART; return NULL; } EXPORT_SYMBOL_GPL(xas_find_marked); /** * xas_find_conflict() - Find the next present entry in a range. * @xas: XArray operation state. * * The @xas describes both a range and a position within that range. * * Context: Any context. Expects xa_lock to be held. * Return: The next entry in the range covered by @xas or %NULL. */ void *xas_find_conflict(struct xa_state *xas) { void *curr; if (xas_error(xas)) return NULL; if (!xas->xa_node) return NULL; if (xas_top(xas->xa_node)) { curr = xas_start(xas); if (!curr) return NULL; while (xa_is_node(curr)) { struct xa_node *node = xa_to_node(curr); curr = xas_descend(xas, node); } if (curr) return curr; } if (xas->xa_node->shift > xas->xa_shift) return NULL; for (;;) { if (xas->xa_node->shift == xas->xa_shift) { if ((xas->xa_offset & xas->xa_sibs) == xas->xa_sibs) break; } else if (xas->xa_offset == XA_CHUNK_MASK) { xas->xa_offset = xas->xa_node->offset; xas->xa_node = xa_parent_locked(xas->xa, xas->xa_node); if (!xas->xa_node) break; continue; } curr = xa_entry_locked(xas->xa, xas->xa_node, ++xas->xa_offset); if (xa_is_sibling(curr)) continue; while (xa_is_node(curr)) { xas->xa_node = xa_to_node(curr); xas->xa_offset = 0; curr = xa_entry_locked(xas->xa, xas->xa_node, 0); } if (curr) return curr; } xas->xa_offset -= xas->xa_sibs; return NULL; } EXPORT_SYMBOL_GPL(xas_find_conflict); /** * xa_load() - Load an entry from an XArray. * @xa: XArray. * @index: index into array. * * Context: Any context. Takes and releases the RCU lock. * Return: The entry at @index in @xa. */ void *xa_load(struct xarray *xa, unsigned long index) { XA_STATE(xas, xa, index); void *entry; rcu_read_lock(); do { entry = xas_load(&xas); if (xa_is_zero(entry)) entry = NULL; } while (xas_retry(&xas, entry)); rcu_read_unlock(); return entry; } EXPORT_SYMBOL(xa_load); static void *xas_result(struct xa_state *xas, void *curr) { if (xa_is_zero(curr)) return NULL; if (xas_error(xas)) curr = xas->xa_node; return curr; } /** * __xa_erase() - Erase this entry from the XArray while locked. * @xa: XArray. * @index: Index into array. * * After this function returns, loading from @index will return %NULL. * If the index is part of a multi-index entry, all indices will be erased * and none of the entries will be part of a multi-index entry. * * Context: Any context. Expects xa_lock to be held on entry. * Return: The entry which used to be at this index. */ void *__xa_erase(struct xarray *xa, unsigned long index) { XA_STATE(xas, xa, index); return xas_result(&xas, xas_store(&xas, NULL)); } EXPORT_SYMBOL(__xa_erase); /** * xa_erase() - Erase this entry from the XArray. * @xa: XArray. * @index: Index of entry. * * After this function returns, loading from @index will return %NULL. * If the index is part of a multi-index entry, all indices will be erased * and none of the entries will be part of a multi-index entry. * * Context: Any context. Takes and releases the xa_lock. * Return: The entry which used to be at this index. */ void *xa_erase(struct xarray *xa, unsigned long index) { void *entry; xa_lock(xa); entry = __xa_erase(xa, index); xa_unlock(xa); return entry; } EXPORT_SYMBOL(xa_erase); /** * __xa_store() - Store this entry in the XArray. * @xa: XArray. * @index: Index into array. * @entry: New entry. * @gfp: Memory allocation flags. * * You must already be holding the xa_lock when calling this function. * It will drop the lock if needed to allocate memory, and then reacquire * it afterwards. * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. * Return: The old entry at this index or xa_err() if an error happened. */ void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) { XA_STATE(xas, xa, index); void *curr; if (WARN_ON_ONCE(xa_is_advanced(entry))) return XA_ERROR(-EINVAL); if (xa_track_free(xa) && !entry) entry = XA_ZERO_ENTRY; do { curr = xas_store(&xas, entry); if (xa_track_free(xa)) xas_clear_mark(&xas, XA_FREE_MARK); } while (__xas_nomem(&xas, gfp)); return xas_result(&xas, curr); } EXPORT_SYMBOL(__xa_store); /** * xa_store() - Store this entry in the XArray. * @xa: XArray. * @index: Index into array. * @entry: New entry. * @gfp: Memory allocation flags. * * After this function returns, loads from this index will return @entry. * Storing into an existing multi-index entry updates the entry of every index. * The marks associated with @index are unaffected unless @entry is %NULL. * * Context: Any context. Takes and releases the xa_lock. * May sleep if the @gfp flags permit. * Return: The old entry at this index on success, xa_err(-EINVAL) if @entry * cannot be stored in an XArray, or xa_err(-ENOMEM) if memory allocation * failed. */ void *xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) { void *curr; xa_lock(xa); curr = __xa_store(xa, index, entry, gfp); xa_unlock(xa); return curr; } EXPORT_SYMBOL(xa_store); /** * __xa_cmpxchg() - Store this entry in the XArray. * @xa: XArray. * @index: Index into array. * @old: Old value to test against. * @entry: New entry. * @gfp: Memory allocation flags. * * You must already be holding the xa_lock when calling this function. * It will drop the lock if needed to allocate memory, and then reacquire * it afterwards. * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. * Return: The old entry at this index or xa_err() if an error happened. */ void *__xa_cmpxchg(struct xarray *xa, unsigned long index, void *old, void *entry, gfp_t gfp) { XA_STATE(xas, xa, index); void *curr; if (WARN_ON_ONCE(xa_is_advanced(entry))) return XA_ERROR(-EINVAL); do { curr = xas_load(&xas); if (curr == old) { xas_store(&xas, entry); if (xa_track_free(xa) && entry && !curr) xas_clear_mark(&xas, XA_FREE_MARK); } } while (__xas_nomem(&xas, gfp)); return xas_result(&xas, curr); } EXPORT_SYMBOL(__xa_cmpxchg); /** * __xa_insert() - Store this entry in the XArray if no entry is present. * @xa: XArray. * @index: Index into array. * @entry: New entry. * @gfp: Memory allocation flags. * * Inserting a NULL entry will store a reserved entry (like xa_reserve()) * if no entry is present. Inserting will fail if a reserved entry is * present, even though loading from this index will return NULL. * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. * Return: 0 if the store succeeded. -EBUSY if another entry was present. * -ENOMEM if memory could not be allocated. */ int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) { XA_STATE(xas, xa, index); void *curr; if (WARN_ON_ONCE(xa_is_advanced(entry))) return -EINVAL; if (!entry) entry = XA_ZERO_ENTRY; do { curr = xas_load(&xas); if (!curr) { xas_store(&xas, entry); if (xa_track_free(xa)) xas_clear_mark(&xas, XA_FREE_MARK); } else { xas_set_err(&xas, -EBUSY); } } while (__xas_nomem(&xas, gfp)); return xas_error(&xas); } EXPORT_SYMBOL(__xa_insert); #ifdef CONFIG_XARRAY_MULTI static void xas_set_range(struct xa_state *xas, unsigned long first, unsigned long last) { unsigned int shift = 0; unsigned long sibs = last - first; unsigned int offset = XA_CHUNK_MASK; xas_set(xas, first); while ((first & XA_CHUNK_MASK) == 0) { if (sibs < XA_CHUNK_MASK) break; if ((sibs == XA_CHUNK_MASK) && (offset < XA_CHUNK_MASK)) break; shift += XA_CHUNK_SHIFT; if (offset == XA_CHUNK_MASK) offset = sibs & XA_CHUNK_MASK; sibs >>= XA_CHUNK_SHIFT; first >>= XA_CHUNK_SHIFT; } offset = first & XA_CHUNK_MASK; if (offset + sibs > XA_CHUNK_MASK) sibs = XA_CHUNK_MASK - offset; if ((((first + sibs + 1) << shift) - 1) > last) sibs -= 1; xas->xa_shift = shift; xas->xa_sibs = sibs; } /** * xa_store_range() - Store this entry at a range of indices in the XArray. * @xa: XArray. * @first: First index to affect. * @last: Last index to affect. * @entry: New entry. * @gfp: Memory allocation flags. * * After this function returns, loads from any index between @first and @last, * inclusive will return @entry. * Storing into an existing multi-index entry updates the entry of every index. * The marks associated with @index are unaffected unless @entry is %NULL. * * Context: Process context. Takes and releases the xa_lock. May sleep * if the @gfp flags permit. * Return: %NULL on success, xa_err(-EINVAL) if @entry cannot be stored in * an XArray, or xa_err(-ENOMEM) if memory allocation failed. */ void *xa_store_range(struct xarray *xa, unsigned long first, unsigned long last, void *entry, gfp_t gfp) { XA_STATE(xas, xa, 0); if (WARN_ON_ONCE(xa_is_internal(entry))) return XA_ERROR(-EINVAL); if (last < first) return XA_ERROR(-EINVAL); do { xas_lock(&xas); if (entry) { unsigned int order = BITS_PER_LONG; if (last + 1) order = __ffs(last + 1); xas_set_order(&xas, last, order); xas_create(&xas, true); if (xas_error(&xas)) goto unlock; } do { xas_set_range(&xas, first, last); xas_store(&xas, entry); if (xas_error(&xas)) goto unlock; first += xas_size(&xas); } while (first <= last); unlock: xas_unlock(&xas); } while (xas_nomem(&xas, gfp)); return xas_result(&xas, NULL); } EXPORT_SYMBOL(xa_store_range); /** * xa_get_order() - Get the order of an entry. * @xa: XArray. * @index: Index of the entry. * * Return: A number between 0 and 63 indicating the order of the entry. */ int xa_get_order(struct xarray *xa, unsigned long index) { XA_STATE(xas, xa, index); void *entry; int order = 0; rcu_read_lock(); entry = xas_load(&xas); if (!entry) goto unlock; if (!xas.xa_node) goto unlock; for (;;) { unsigned int slot = xas.xa_offset + (1 << order); if (slot >= XA_CHUNK_SIZE) break; if (!xa_is_sibling(xas.xa_node->slots[slot])) break; order++; } order += xas.xa_node->shift; unlock: rcu_read_unlock(); return order; } EXPORT_SYMBOL(xa_get_order); #endif /* CONFIG_XARRAY_MULTI */ /** * __xa_alloc() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. * @limit: Range for allocated ID. * @entry: New entry. * @gfp: Memory allocation flags. * * Finds an empty entry in @xa between @limit.min and @limit.max, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or * -EBUSY if there are no free entries in @limit. */ int __xa_alloc(struct xarray *xa, u32 *id, void *entry, struct xa_limit limit, gfp_t gfp) { XA_STATE(xas, xa, 0); if (WARN_ON_ONCE(xa_is_advanced(entry))) return -EINVAL; if (WARN_ON_ONCE(!xa_track_free(xa))) return -EINVAL; if (!entry) entry = XA_ZERO_ENTRY; do { xas.xa_index = limit.min; xas_find_marked(&xas, limit.max, XA_FREE_MARK); if (xas.xa_node == XAS_RESTART) xas_set_err(&xas, -EBUSY); else *id = xas.xa_index; xas_store(&xas, entry); xas_clear_mark(&xas, XA_FREE_MARK); } while (__xas_nomem(&xas, gfp)); return xas_error(&xas); } EXPORT_SYMBOL(__xa_alloc); /** * __xa_alloc_cyclic() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. * @entry: New entry. * @limit: Range of allocated ID. * @next: Pointer to next ID to allocate. * @gfp: Memory allocation flags. * * Finds an empty entry in @xa between @limit.min and @limit.max, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * The search for an empty entry will start at @next and will wrap * around if necessary. * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the * allocation succeeded after wrapping, -ENOMEM if memory could not be * allocated or -EBUSY if there are no free entries in @limit. */ int __xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, struct xa_limit limit, u32 *next, gfp_t gfp) { u32 min = limit.min; int ret; limit.min = max(min, *next); ret = __xa_alloc(xa, id, entry, limit, gfp); if ((xa->xa_flags & XA_FLAGS_ALLOC_WRAPPED) && ret == 0) { xa->xa_flags &= ~XA_FLAGS_ALLOC_WRAPPED; ret = 1; } if (ret < 0 && limit.min > min) { limit.min = min; ret = __xa_alloc(xa, id, entry, limit, gfp); if (ret == 0) ret = 1; } if (ret >= 0) { *next = *id + 1; if (*next == 0) xa->xa_flags |= XA_FLAGS_ALLOC_WRAPPED; } return ret; } EXPORT_SYMBOL(__xa_alloc_cyclic); /** * __xa_set_mark() - Set this mark on this entry while locked. * @xa: XArray. * @index: Index of entry. * @mark: Mark number. * * Attempting to set a mark on a %NULL entry does not succeed. * * Context: Any context. Expects xa_lock to be held on entry. */ void __xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) { XA_STATE(xas, xa, index); void *entry = xas_load(&xas); if (entry) xas_set_mark(&xas, mark); } EXPORT_SYMBOL(__xa_set_mark); /** * __xa_clear_mark() - Clear this mark on this entry while locked. * @xa: XArray. * @index: Index of entry. * @mark: Mark number. * * Context: Any context. Expects xa_lock to be held on entry. */ void __xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) { XA_STATE(xas, xa, index); void *entry = xas_load(&xas); if (entry) xas_clear_mark(&xas, mark); } EXPORT_SYMBOL(__xa_clear_mark); /** * xa_get_mark() - Inquire whether this mark is set on this entry. * @xa: XArray. * @index: Index of entry. * @mark: Mark number. * * This function uses the RCU read lock, so the result may be out of date * by the time it returns. If you need the result to be stable, use a lock. * * Context: Any context. Takes and releases the RCU lock. * Return: True if the entry at @index has this mark set, false if it doesn't. */ bool xa_get_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) { XA_STATE(xas, xa, index); void *entry; rcu_read_lock(); entry = xas_start(&xas); while (xas_get_mark(&xas, mark)) { if (!xa_is_node(entry)) goto found; entry = xas_descend(&xas, xa_to_node(entry)); } rcu_read_unlock(); return false; found: rcu_read_unlock(); return true; } EXPORT_SYMBOL(xa_get_mark); /** * xa_set_mark() - Set this mark on this entry. * @xa: XArray. * @index: Index of entry. * @mark: Mark number. * * Attempting to set a mark on a %NULL entry does not succeed. * * Context: Process context. Takes and releases the xa_lock. */ void xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) { xa_lock(xa); __xa_set_mark(xa, index, mark); xa_unlock(xa); } EXPORT_SYMBOL(xa_set_mark); /** * xa_clear_mark() - Clear this mark on this entry. * @xa: XArray. * @index: Index of entry. * @mark: Mark number. * * Clearing a mark always succeeds. * * Context: Process context. Takes and releases the xa_lock. */ void xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) { xa_lock(xa); __xa_clear_mark(xa, index, mark); xa_unlock(xa); } EXPORT_SYMBOL(xa_clear_mark); /** * xa_find() - Search the XArray for an entry. * @xa: XArray. * @indexp: Pointer to an index. * @max: Maximum index to search to. * @filter: Selection criterion. * * Finds the entry in @xa which matches the @filter, and has the lowest * index that is at least @indexp and no more than @max. * If an entry is found, @indexp is updated to be the index of the entry. * This function is protected by the RCU read lock, so it may not find * entries which are being simultaneously added. It will not return an * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find(). * * Context: Any context. Takes and releases the RCU lock. * Return: The entry, if found, otherwise %NULL. */ void *xa_find(struct xarray *xa, unsigned long *indexp, unsigned long max, xa_mark_t filter) { XA_STATE(xas, xa, *indexp); void *entry; rcu_read_lock(); do { if ((__force unsigned int)filter < XA_MAX_MARKS) entry = xas_find_marked(&xas, max, filter); else entry = xas_find(&xas, max); } while (xas_retry(&xas, entry)); rcu_read_unlock(); if (entry) *indexp = xas.xa_index; return entry; } EXPORT_SYMBOL(xa_find); static bool xas_sibling(struct xa_state *xas) { struct xa_node *node = xas->xa_node; unsigned long mask; if (!IS_ENABLED(CONFIG_XARRAY_MULTI) || !node) return false; mask = (XA_CHUNK_SIZE << node->shift) - 1; return (xas->xa_index & mask) > ((unsigned long)xas->xa_offset << node->shift); } /** * xa_find_after() - Search the XArray for a present entry. * @xa: XArray. * @indexp: Pointer to an index. * @max: Maximum index to search to. * @filter: Selection criterion. * * Finds the entry in @xa which matches the @filter and has the lowest * index that is above @indexp and no more than @max. * If an entry is found, @indexp is updated to be the index of the entry. * This function is protected by the RCU read lock, so it may miss entries * which are being simultaneously added. It will not return an * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find(). * * Context: Any context. Takes and releases the RCU lock. * Return: The pointer, if found, otherwise %NULL. */ void *xa_find_after(struct xarray *xa, unsigned long *indexp, unsigned long max, xa_mark_t filter) { XA_STATE(xas, xa, *indexp + 1); void *entry; if (xas.xa_index == 0) return NULL; rcu_read_lock(); for (;;) { if ((__force unsigned int)filter < XA_MAX_MARKS) entry = xas_find_marked(&xas, max, filter); else entry = xas_find(&xas, max); if (xas_invalid(&xas)) break; if (xas_sibling(&xas)) continue; if (!xas_retry(&xas, entry)) break; } rcu_read_unlock(); if (entry) *indexp = xas.xa_index; return entry; } EXPORT_SYMBOL(xa_find_after); static unsigned int xas_extract_present(struct xa_state *xas, void **dst, unsigned long max, unsigned int n) { void *entry; unsigned int i = 0; rcu_read_lock(); xas_for_each(xas, entry, max) { if (xas_retry(xas, entry)) continue; dst[i++] = entry; if (i == n) break; } rcu_read_unlock(); return i; } static unsigned int xas_extract_marked(struct xa_state *xas, void **dst, unsigned long max, unsigned int n, xa_mark_t mark) { void *entry; unsigned int i = 0; rcu_read_lock(); xas_for_each_marked(xas, entry, max, mark) { if (xas_retry(xas, entry)) continue; dst[i++] = entry; if (i == n) break; } rcu_read_unlock(); return i; } /** * xa_extract() - Copy selected entries from the XArray into a normal array. * @xa: The source XArray to copy from. * @dst: The buffer to copy entries into. * @start: The first index in the XArray eligible to be selected. * @max: The last index in the XArray eligible to be selected. * @n: The maximum number of entries to copy. * @filter: Selection criterion. * * Copies up to @n entries that match @filter from the XArray. The * copied entries will have indices between @start and @max, inclusive. * * The @filter may be an XArray mark value, in which case entries which are * marked with that mark will be copied. It may also be %XA_PRESENT, in * which case all entries which are not %NULL will be copied. * * The entries returned may not represent a snapshot of the XArray at a * moment in time. For example, if another thread stores to index 5, then * index 10, calling xa_extract() may return the old contents of index 5 * and the new contents of index 10. Indices not modified while this * function is running will not be skipped. * * If you need stronger guarantees, holding the xa_lock across calls to this * function will prevent concurrent modification. * * Context: Any context. Takes and releases the RCU lock. * Return: The number of entries copied. */ unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start, unsigned long max, unsigned int n, xa_mark_t filter) { XA_STATE(xas, xa, start); if (!n) return 0; if ((__force unsigned int)filter < XA_MAX_MARKS) return xas_extract_marked(&xas, dst, max, n, filter); return xas_extract_present(&xas, dst, max, n); } EXPORT_SYMBOL(xa_extract); /** * xa_delete_node() - Private interface for workingset code. * @node: Node to be removed from the tree. * @update: Function to call to update ancestor nodes. * * Context: xa_lock must be held on entry and will not be released. */ void xa_delete_node(struct xa_node *node, xa_update_node_t update) { struct xa_state xas = { .xa = node->array, .xa_index = (unsigned long)node->offset << (node->shift + XA_CHUNK_SHIFT), .xa_shift = node->shift + XA_CHUNK_SHIFT, .xa_offset = node->offset, .xa_node = xa_parent_locked(node->array, node), .xa_update = update, }; xas_store(&xas, NULL); } EXPORT_SYMBOL_GPL(xa_delete_node); /* For the benefit of the test suite */ /** * xa_destroy() - Free all internal data structures. * @xa: XArray. * * After calling this function, the XArray is empty and has freed all memory * allocated for its internal data structures. You are responsible for * freeing the objects referenced by the XArray. * * Context: Any context. Takes and releases the xa_lock, interrupt-safe. */ void xa_destroy(struct xarray *xa) { XA_STATE(xas, xa, 0); unsigned long flags; void *entry; xas.xa_node = NULL; xas_lock_irqsave(&xas, flags); entry = xa_head_locked(xa); RCU_INIT_POINTER(xa->xa_head, NULL); xas_init_marks(&xas); if (xa_zero_busy(xa)) xa_mark_clear(xa, XA_FREE_MARK); /* lockdep checks we're still holding the lock in xas_free_nodes() */ if (xa_is_node(entry)) xas_free_nodes(&xas, xa_to_node(entry)); xas_unlock_irqrestore(&xas, flags); } EXPORT_SYMBOL(xa_destroy); #ifdef XA_DEBUG void xa_dump_node(const struct xa_node *node) { unsigned i, j; if (!node) return; if ((unsigned long)node & 3) { pr_cont("node %px\n", node); return; } pr_cont("node %px %s %d parent %px shift %d count %d values %d " "array %px list %px %px marks", node, node->parent ? "offset" : "max", node->offset, node->parent, node->shift, node->count, node->nr_values, node->array, node->private_list.prev, node->private_list.next); for (i = 0; i < XA_MAX_MARKS; i++) for (j = 0; j < XA_MARK_LONGS; j++) pr_cont(" %lx", node->marks[i][j]); pr_cont("\n"); } void xa_dump_index(unsigned long index, unsigned int shift) { if (!shift) pr_info("%lu: ", index); else if (shift >= BITS_PER_LONG) pr_info("0-%lu: ", ~0UL); else pr_info("%lu-%lu: ", index, index | ((1UL << shift) - 1)); } void xa_dump_entry(const void *entry, unsigned long index, unsigned long shift) { if (!entry) return; xa_dump_index(index, shift); if (xa_is_node(entry)) { if (shift == 0) { pr_cont("%px\n", entry); } else { unsigned long i; struct xa_node *node = xa_to_node(entry); xa_dump_node(node); for (i = 0; i < XA_CHUNK_SIZE; i++) xa_dump_entry(node->slots[i], index + (i << node->shift), node->shift); } } else if (xa_is_value(entry)) pr_cont("value %ld (0x%lx) [%px]\n", xa_to_value(entry), xa_to_value(entry), entry); else if (!xa_is_internal(entry)) pr_cont("%px\n", entry); else if (xa_is_retry(entry)) pr_cont("retry (%ld)\n", xa_to_internal(entry)); else if (xa_is_sibling(entry)) pr_cont("sibling (slot %ld)\n", xa_to_sibling(entry)); else if (xa_is_zero(entry)) pr_cont("zero (%ld)\n", xa_to_internal(entry)); else pr_cont("UNKNOWN ENTRY (%px)\n", entry); } void xa_dump(const struct xarray *xa) { void *entry = xa->xa_head; unsigned int shift = 0; pr_info("xarray: %px head %px flags %x marks %d %d %d\n", xa, entry, xa->xa_flags, xa_marked(xa, XA_MARK_0), xa_marked(xa, XA_MARK_1), xa_marked(xa, XA_MARK_2)); if (xa_is_node(entry)) shift = xa_to_node(entry)->shift + XA_CHUNK_SHIFT; xa_dump_entry(entry, 0, shift); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 /* SPDX-License-Identifier: GPL-2.0 */ /* * Linux Socket Filter Data Structures */ #ifndef __LINUX_FILTER_H__ #define __LINUX_FILTER_H__ #include <stdarg.h> #include <linux/atomic.h> #include <linux/refcount.h> #include <linux/compat.h> #include <linux/skbuff.h> #include <linux/linkage.h> #include <linux/printk.h> #include <linux/workqueue.h> #include <linux/sched.h> #include <linux/capability.h> #include <linux/set_memory.h> #include <linux/kallsyms.h> #include <linux/if_vlan.h> #include <linux/vmalloc.h> #include <linux/sockptr.h> #include <crypto/sha.h> #include <net/sch_generic.h> #include <asm/byteorder.h> #include <uapi/linux/filter.h> #include <uapi/linux/bpf.h> struct sk_buff; struct sock; struct seccomp_data; struct bpf_prog_aux; struct xdp_rxq_info; struct xdp_buff; struct sock_reuseport; struct ctl_table; struct ctl_table_header; /* ArgX, context and stack frame pointer register positions. Note, * Arg1, Arg2, Arg3, etc are used as argument mappings of function * calls in BPF_CALL instruction. */ #define BPF_REG_ARG1 BPF_REG_1 #define BPF_REG_ARG2 BPF_REG_2 #define BPF_REG_ARG3 BPF_REG_3 #define BPF_REG_ARG4 BPF_REG_4 #define BPF_REG_ARG5 BPF_REG_5 #define BPF_REG_CTX BPF_REG_6 #define BPF_REG_FP BPF_REG_10 /* Additional register mappings for converted user programs. */ #define BPF_REG_A BPF_REG_0 #define BPF_REG_X BPF_REG_7 #define BPF_REG_TMP BPF_REG_2 /* scratch reg */ #define BPF_REG_D BPF_REG_8 /* data, callee-saved */ #define BPF_REG_H BPF_REG_9 /* hlen, callee-saved */ /* Kernel hidden auxiliary/helper register. */ #define BPF_REG_AX MAX_BPF_REG #define MAX_BPF_EXT_REG (MAX_BPF_REG + 1) #define MAX_BPF_JIT_REG MAX_BPF_EXT_REG /* unused opcode to mark special call to bpf_tail_call() helper */ #define BPF_TAIL_CALL 0xf0 /* unused opcode to mark special load instruction. Same as BPF_ABS */ #define BPF_PROBE_MEM 0x20 /* unused opcode to mark call to interpreter with arguments */ #define BPF_CALL_ARGS 0xe0 /* unused opcode to mark speculation barrier for mitigating * Speculative Store Bypass */ #define BPF_NOSPEC 0xc0 /* As per nm, we expose JITed images as text (code) section for * kallsyms. That way, tools like perf can find it to match * addresses. */ #define BPF_SYM_ELF_TYPE 't' /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 /* Helper macros for filter block array initializers. */ /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ #define BPF_ALU64_REG(OP, DST, SRC) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = 0 }) #define BPF_ALU32_REG(OP, DST, SRC) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = 0 }) /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ #define BPF_ALU64_IMM(OP, DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) #define BPF_ALU32_IMM(OP, DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) /* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */ #define BPF_ENDIAN(TYPE, DST, LEN) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_END | BPF_SRC(TYPE), \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = LEN }) /* Short form of mov, dst_reg = src_reg */ #define BPF_MOV64_REG(DST, SRC) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_MOV | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = 0 }) #define BPF_MOV32_REG(DST, SRC) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_MOV | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = 0 }) /* Short form of mov, dst_reg = imm32 */ #define BPF_MOV64_IMM(DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_MOV | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) #define BPF_MOV32_IMM(DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_MOV | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) /* Special form of mov32, used for doing explicit zero extension on dst. */ #define BPF_ZEXT_REG(DST) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_MOV | BPF_X, \ .dst_reg = DST, \ .src_reg = DST, \ .off = 0, \ .imm = 1 }) static inline bool insn_is_zext(const struct bpf_insn *insn) { return insn->code == (BPF_ALU | BPF_MOV | BPF_X) && insn->imm == 1; } /* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ #define BPF_LD_IMM64(DST, IMM) \ BPF_LD_IMM64_RAW(DST, 0, IMM) #define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ ((struct bpf_insn) { \ .code = BPF_LD | BPF_DW | BPF_IMM, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = (__u32) (IMM) }), \ ((struct bpf_insn) { \ .code = 0, /* zero is reserved opcode */ \ .dst_reg = 0, \ .src_reg = 0, \ .off = 0, \ .imm = ((__u64) (IMM)) >> 32 }) /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ #define BPF_LD_MAP_FD(DST, MAP_FD) \ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */ #define BPF_MOV64_RAW(TYPE, DST, SRC, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE), \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = IMM }) #define BPF_MOV32_RAW(TYPE, DST, SRC, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_MOV | BPF_SRC(TYPE), \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = IMM }) /* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ #define BPF_LD_ABS(SIZE, IMM) \ ((struct bpf_insn) { \ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ .dst_reg = 0, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) /* Indirect packet access, R0 = *(uint *) (skb->data + src_reg + imm32) */ #define BPF_LD_IND(SIZE, SRC, IMM) \ ((struct bpf_insn) { \ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_IND, \ .dst_reg = 0, \ .src_reg = SRC, \ .off = 0, \ .imm = IMM }) /* Memory load, dst_reg = *(uint *) (src_reg + off16) */ #define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) /* Memory store, *(uint *) (dst_reg + off16) = src_reg */ #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) /* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ #define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ #define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ ((struct bpf_insn) { \ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ .dst_reg = DST, \ .src_reg = 0, \ .off = OFF, \ .imm = IMM }) /* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ #define BPF_JMP_REG(OP, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ ((struct bpf_insn) { \ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = OFF, \ .imm = IMM }) /* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ #define BPF_JMP32_REG(OP, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) /* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ #define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ ((struct bpf_insn) { \ .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = OFF, \ .imm = IMM }) /* Unconditional jumps, goto pc + off16 */ #define BPF_JMP_A(OFF) \ ((struct bpf_insn) { \ .code = BPF_JMP | BPF_JA, \ .dst_reg = 0, \ .src_reg = 0, \ .off = OFF, \ .imm = 0 }) /* Relative call */ #define BPF_CALL_REL(TGT) \ ((struct bpf_insn) { \ .code = BPF_JMP | BPF_CALL, \ .dst_reg = 0, \ .src_reg = BPF_PSEUDO_CALL, \ .off = 0, \ .imm = TGT }) /* Function call */ #define BPF_CAST_CALL(x) \ ((u64 (*)(u64, u64, u64, u64, u64))(x)) #define BPF_EMIT_CALL(FUNC) \ ((struct bpf_insn) { \ .code = BPF_JMP | BPF_CALL, \ .dst_reg = 0, \ .src_reg = 0, \ .off = 0, \ .imm = ((FUNC) - __bpf_call_base) }) /* Raw code statement block */ #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ ((struct bpf_insn) { \ .code = CODE, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = IMM }) /* Program exit */ #define BPF_EXIT_INSN() \ ((struct bpf_insn) { \ .code = BPF_JMP | BPF_EXIT, \ .dst_reg = 0, \ .src_reg = 0, \ .off = 0, \ .imm = 0 }) /* Speculation barrier */ #define BPF_ST_NOSPEC() \ ((struct bpf_insn) { \ .code = BPF_ST | BPF_NOSPEC, \ .dst_reg = 0, \ .src_reg = 0, \ .off = 0, \ .imm = 0 }) /* Internal classic blocks for direct assignment */ #define __BPF_STMT(CODE, K) \ ((struct sock_filter) BPF_STMT(CODE, K)) #define __BPF_JUMP(CODE, K, JT, JF) \ ((struct sock_filter) BPF_JUMP(CODE, K, JT, JF)) #define bytes_to_bpf_size(bytes) \ ({ \ int bpf_size = -EINVAL; \ \ if (bytes == sizeof(u8)) \ bpf_size = BPF_B; \ else if (bytes == sizeof(u16)) \ bpf_size = BPF_H; \ else if (bytes == sizeof(u32)) \ bpf_size = BPF_W; \ else if (bytes == sizeof(u64)) \ bpf_size = BPF_DW; \ \ bpf_size; \ }) #define bpf_size_to_bytes(bpf_size) \ ({ \ int bytes = -EINVAL; \ \ if (bpf_size == BPF_B) \ bytes = sizeof(u8); \ else if (bpf_size == BPF_H) \ bytes = sizeof(u16); \ else if (bpf_size == BPF_W) \ bytes = sizeof(u32); \ else if (bpf_size == BPF_DW) \ bytes = sizeof(u64); \ \ bytes; \ }) #define BPF_SIZEOF(type) \ ({ \ const int __size = bytes_to_bpf_size(sizeof(type)); \ BUILD_BUG_ON(__size < 0); \ __size; \ }) #define BPF_FIELD_SIZEOF(type, field) \ ({ \ const int __size = bytes_to_bpf_size(sizeof_field(type, field)); \ BUILD_BUG_ON(__size < 0); \ __size; \ }) #define BPF_LDST_BYTES(insn) \ ({ \ const int __size = bpf_size_to_bytes(BPF_SIZE((insn)->code)); \ WARN_ON(__size < 0); \ __size; \ }) #define __BPF_MAP_0(m, v, ...) v #define __BPF_MAP_1(m, v, t, a, ...) m(t, a) #define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__) #define __BPF_MAP_3(m, v, t, a, ...) m(t, a), __BPF_MAP_2(m, v, __VA_ARGS__) #define __BPF_MAP_4(m, v, t, a, ...) m(t, a), __BPF_MAP_3(m, v, __VA_ARGS__) #define __BPF_MAP_5(m, v, t, a, ...) m(t, a), __BPF_MAP_4(m, v, __VA_ARGS__) #define __BPF_REG_0(...) __BPF_PAD(5) #define __BPF_REG_1(...) __BPF_MAP(1, __VA_ARGS__), __BPF_PAD(4) #define __BPF_REG_2(...) __BPF_MAP(2, __VA_ARGS__), __BPF_PAD(3) #define __BPF_REG_3(...) __BPF_MAP(3, __VA_ARGS__), __BPF_PAD(2) #define __BPF_REG_4(...) __BPF_MAP(4, __VA_ARGS__), __BPF_PAD(1) #define __BPF_REG_5(...) __BPF_MAP(5, __VA_ARGS__) #define __BPF_MAP(n, ...) __BPF_MAP_##n(__VA_ARGS__) #define __BPF_REG(n, ...) __BPF_REG_##n(__VA_ARGS__) #define __BPF_CAST(t, a) \ (__force t) \ (__force \ typeof(__builtin_choose_expr(sizeof(t) == sizeof(unsigned long), \ (unsigned long)0, (t)0))) a #define __BPF_V void #define __BPF_N #define __BPF_DECL_ARGS(t, a) t a #define __BPF_DECL_REGS(t, a) u64 a #define __BPF_PAD(n) \ __BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2, \ u64, __ur_3, u64, __ur_4, u64, __ur_5) #define BPF_CALL_x(x, name, ...) \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ { \ return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ } \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) #define BPF_CALL_0(name, ...) BPF_CALL_x(0, name, __VA_ARGS__) #define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__) #define BPF_CALL_2(name, ...) BPF_CALL_x(2, name, __VA_ARGS__) #define BPF_CALL_3(name, ...) BPF_CALL_x(3, name, __VA_ARGS__) #define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__) #define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__) #define bpf_ctx_range(TYPE, MEMBER) \ offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 #define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2) \ offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1 #if BITS_PER_LONG == 64 # define bpf_ctx_range_ptr(TYPE, MEMBER) \ offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 #else # define bpf_ctx_range_ptr(TYPE, MEMBER) \ offsetof(TYPE, MEMBER) ... offsetof(TYPE, MEMBER) + 8 - 1 #endif /* BITS_PER_LONG == 64 */ #define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE) \ ({ \ BUILD_BUG_ON(sizeof_field(TYPE, MEMBER) != (SIZE)); \ *(PTR_SIZE) = (SIZE); \ offsetof(TYPE, MEMBER); \ }) /* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { u16 len; compat_uptr_t filter; /* struct sock_filter * */ }; struct sock_fprog_kern { u16 len; struct sock_filter *filter; }; /* Some arches need doubleword alignment for their instructions and/or data */ #define BPF_IMAGE_ALIGNMENT 8 struct bpf_binary_header { u32 pages; u8 image[] __aligned(BPF_IMAGE_ALIGNMENT); }; struct bpf_prog { u16 pages; /* Number of allocated pages */ u16 jited:1, /* Is our filter JIT'ed? */ jit_requested:1,/* archs need to JIT the prog */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ dst_needed:1, /* Do we need dst entry? */ blinded:1, /* Was blinded */ is_func:1, /* program is a bpf function */ kprobe_override:1, /* Do we override a kprobe? */ has_callchain_buf:1, /* callchain buffer allocated? */ enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ call_get_stack:1; /* Do we call bpf_get_stack() or bpf_get_stackid() */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ u32 jited_len; /* Size of jited insns in bytes */ u8 tag[BPF_TAG_SIZE]; struct bpf_prog_aux *aux; /* Auxiliary fields */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ unsigned int (*bpf_func)(const void *ctx, const struct bpf_insn *insn); /* Instructions for interpreter */ struct sock_filter insns[0]; struct bpf_insn insnsi[]; }; struct sk_filter { refcount_t refcnt; struct rcu_head rcu; struct bpf_prog *prog; }; DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); #define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \ u32 __ret; \ cant_migrate(); \ if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ struct bpf_prog_stats *__stats; \ u64 __start = sched_clock(); \ __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ __stats = this_cpu_ptr(prog->aux->stats); \ u64_stats_update_begin(&__stats->syncp); \ __stats->cnt++; \ __stats->nsecs += sched_clock() - __start; \ u64_stats_update_end(&__stats->syncp); \ } else { \ __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ } \ __ret; }) #define BPF_PROG_RUN(prog, ctx) \ __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func) /* * Use in preemptible and therefore migratable context to make sure that * the execution of the BPF program runs on one CPU. * * This uses migrate_disable/enable() explicitly to document that the * invocation of a BPF program does not require reentrancy protection * against a BPF program which is invoked from a preempting task. * * For non RT enabled kernels migrate_disable/enable() maps to * preempt_disable/enable(), i.e. it disables also preemption. */ static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog, const void *ctx) { u32 ret; migrate_disable(); ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func); migrate_enable(); return ret; } #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN struct bpf_skb_data_end { struct qdisc_skb_cb qdisc_cb; void *data_meta; void *data_end; }; struct bpf_nh_params { u32 nh_family; union { u32 ipv4_nh; struct in6_addr ipv6_nh; }; }; struct bpf_redirect_info { u32 flags; u32 tgt_index; void *tgt_value; struct bpf_map *map; u32 kern_flags; struct bpf_nh_params nh; }; DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); /* flags for bpf_redirect_info kern_flags */ #define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */ /* Compute the linear packet data range [data, data_end) which * will be accessed by various program types (cls_bpf, act_bpf, * lwt, ...). Subsystems allowing direct data access must (!) * ensure that cb[] area can be written to when BPF program is * invoked (otherwise cb[] save/restore is necessary). */ static inline void bpf_compute_data_pointers(struct sk_buff *skb) { struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb)); cb->data_meta = skb->data - skb_metadata_len(skb); cb->data_end = skb->data + skb_headlen(skb); } /* Similar to bpf_compute_data_pointers(), except that save orginal * data in cb->data and cb->meta_data for restore. */ static inline void bpf_compute_and_save_data_end( struct sk_buff *skb, void **saved_data_end) { struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; *saved_data_end = cb->data_end; cb->data_end = skb->data + skb_headlen(skb); } /* Restore data saved by bpf_compute_data_pointers(). */ static inline void bpf_restore_data_end( struct sk_buff *skb, void *saved_data_end) { struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; cb->data_end = saved_data_end; } static inline u8 *bpf_skb_cb(struct sk_buff *skb) { /* eBPF programs may read/write skb->cb[] area to transfer meta * data between tail calls. Since this also needs to work with * tc, that scratch memory is mapped to qdisc_skb_cb's data area. * * In some socket filter cases, the cb unfortunately needs to be * saved/restored so that protocol specific skb->cb[] data won't * be lost. In any case, due to unpriviledged eBPF programs * attached to sockets, we need to clear the bpf_skb_cb() area * to not leak previous contents to user space. */ BUILD_BUG_ON(sizeof_field(struct __sk_buff, cb) != BPF_SKB_CB_LEN); BUILD_BUG_ON(sizeof_field(struct __sk_buff, cb) != sizeof_field(struct qdisc_skb_cb, data)); return qdisc_skb_cb(skb)->data; } /* Must be invoked with migration disabled */ static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog, struct sk_buff *skb) { u8 *cb_data = bpf_skb_cb(skb); u8 cb_saved[BPF_SKB_CB_LEN]; u32 res; if (unlikely(prog->cb_access)) { memcpy(cb_saved, cb_data, sizeof(cb_saved)); memset(cb_data, 0, sizeof(cb_saved)); } res = BPF_PROG_RUN(prog, skb); if (unlikely(prog->cb_access)) memcpy(cb_data, cb_saved, sizeof(cb_saved)); return res; } static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, struct sk_buff *skb) { u32 res; migrate_disable(); res = __bpf_prog_run_save_cb(prog, skb); migrate_enable(); return res; } static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, struct sk_buff *skb) { u8 *cb_data = bpf_skb_cb(skb); u32 res; if (unlikely(prog->cb_access)) memset(cb_data, 0, BPF_SKB_CB_LEN); res = bpf_prog_run_pin_on_cpu(prog, skb); return res; } DECLARE_BPF_DISPATCHER(xdp) static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, struct xdp_buff *xdp) { /* Caller needs to hold rcu_read_lock() (!), otherwise program * can be released while still running, or map elements could be * freed early while still having concurrent users. XDP fastpath * already takes rcu_read_lock() when fetching the program, so * it's not necessary here anymore. */ return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); } void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog) { return prog->len * sizeof(struct bpf_insn); } static inline u32 bpf_prog_tag_scratch_size(const struct bpf_prog *prog) { return round_up(bpf_prog_insn_size(prog) + sizeof(__be64) + 1, SHA1_BLOCK_SIZE); } static inline unsigned int bpf_prog_size(unsigned int proglen) { return max(sizeof(struct bpf_prog), offsetof(struct bpf_prog, insns[proglen])); } static inline bool bpf_prog_was_classic(const struct bpf_prog *prog) { /* When classic BPF programs have been loaded and the arch * does not have a classic BPF JIT (anymore), they have been * converted via bpf_migrate_filter() to eBPF and thus always * have an unspec program type. */ return prog->type == BPF_PROG_TYPE_UNSPEC; } static inline u32 bpf_ctx_off_adjust_machine(u32 size) { const u32 size_machine = sizeof(unsigned long); if (size > size_machine && size % size_machine == 0) size = size_machine; return size; } static inline bool bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default) { return size <= size_default && (size & (size - 1)) == 0; } static inline u8 bpf_ctx_narrow_access_offset(u32 off, u32 size, u32 size_default) { u8 access_off = off & (size_default - 1); #ifdef __LITTLE_ENDIAN return access_off; #else return size_default - (access_off + size); #endif } #define bpf_ctx_wide_access_ok(off, size, type, field) \ (size == sizeof(__u64) && \ off >= offsetof(type, field) && \ off + sizeof(__u64) <= offsetofend(type, field) && \ off % sizeof(__u64) == 0) #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) static inline void bpf_prog_lock_ro(struct bpf_prog *fp) { #ifndef CONFIG_BPF_JIT_ALWAYS_ON if (!fp->jited) { set_vm_flush_reset_perms(fp); set_memory_ro((unsigned long)fp, fp->pages); } #endif } static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) { set_vm_flush_reset_perms(hdr); set_memory_ro((unsigned long)hdr, hdr->pages); set_memory_x((unsigned long)hdr, hdr->pages); } static inline struct bpf_binary_header * bpf_jit_binary_hdr(const struct bpf_prog *fp) { unsigned long real_start = (unsigned long)fp->bpf_func; unsigned long addr = real_start & PAGE_MASK; return (void *)addr; } int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { return sk_filter_trim_cap(sk, skb, 1); } struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err); void bpf_prog_free(struct bpf_prog *fp); bool bpf_opcode_in_insntable(u8 code); void bpf_prog_free_linfo(struct bpf_prog *prog); void bpf_prog_fill_jited_linfo(struct bpf_prog *prog, const u32 *insn_to_jit_off); int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog); void bpf_prog_free_jited_linfo(struct bpf_prog *prog); void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog); struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags); struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags); void __bpf_prog_free(struct bpf_prog *fp); static inline void bpf_prog_unlock_free(struct bpf_prog *fp) { __bpf_prog_free(fp); } typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter, unsigned int flen); int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, bpf_aux_classic_check_t trans, bool save_orig); void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk); void sk_reuseport_prog_free(struct bpf_prog *prog); int sk_detach_filter(struct sock *sk); int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); bool sk_filter_charge(struct sock *sk, struct sk_filter *fp); void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); #define __bpf_call_base_args \ ((u64 (*)(u64, u64, u64, u64, u64, const struct bpf_insn *)) \ (void *)__bpf_call_base) struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); bool bpf_helper_changes_pkt_data(void *func); static inline bool bpf_dump_raw_ok(const struct cred *cred) { /* Reconstruction of call-sites is dependent on kallsyms, * thus make dump the same restriction. */ return kallsyms_show_value(cred); } struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt); void bpf_clear_redirect_map(struct bpf_map *map); static inline bool xdp_return_frame_no_direct(void) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); return ri->kern_flags & BPF_RI_F_RF_NO_DIRECT; } static inline void xdp_set_return_frame_no_direct(void) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); ri->kern_flags |= BPF_RI_F_RF_NO_DIRECT; } static inline void xdp_clear_return_frame_no_direct(void) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); ri->kern_flags &= ~BPF_RI_F_RF_NO_DIRECT; } static inline int xdp_ok_fwd_dev(const struct net_device *fwd, unsigned int pktlen) { unsigned int len; if (unlikely(!(fwd->flags & IFF_UP))) return -ENETDOWN; len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; if (pktlen > len) return -EMSGSIZE; return 0; } /* The pair of xdp_do_redirect and xdp_do_flush MUST be called in the * same cpu context. Further for best results no more than a single map * for the do_redirect/do_flush pair should be used. This limitation is * because we only track one map and force a flush when the map changes. * This does not appear to be a real limitation for existing software. */ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, struct bpf_prog *prog); int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *prog); void xdp_do_flush(void); /* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as * it is no longer only flushing maps. Keep this define for compatibility * until all drivers are updated - do not use xdp_do_flush_map() in new code! */ #define xdp_do_flush_map xdp_do_flush void bpf_warn_invalid_xdp_action(u32 act); #ifdef CONFIG_INET struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, struct bpf_prog *prog, struct sk_buff *skb, u32 hash); #else static inline struct sock * bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, struct bpf_prog *prog, struct sk_buff *skb, u32 hash) { return NULL; } #endif #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; extern int bpf_jit_harden; extern int bpf_jit_kallsyms; extern long bpf_jit_limit; extern long bpf_jit_limit_max; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, bpf_jit_fill_hole_t bpf_fill_ill_insns); void bpf_jit_binary_free(struct bpf_binary_header *hdr); u64 bpf_jit_alloc_exec_limit(void); void *bpf_jit_alloc_exec(unsigned long size); void bpf_jit_free_exec(void *addr); void bpf_jit_free(struct bpf_prog *fp); int bpf_jit_add_poke_descriptor(struct bpf_prog *prog, struct bpf_jit_poke_descriptor *poke); int bpf_jit_get_func_addr(const struct bpf_prog *prog, const struct bpf_insn *insn, bool extra_pass, u64 *func_addr, bool *func_addr_fixed); struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp); void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other); static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, u32 pass, void *image) { pr_err("flen=%u proglen=%u pass=%u image=%pK from=%s pid=%d\n", flen, proglen, pass, image, current->comm, task_pid_nr(current)); if (image) print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET, 16, 1, image, proglen, false); } static inline bool bpf_jit_is_ebpf(void) { # ifdef CONFIG_HAVE_EBPF_JIT return true; # else return false; # endif } static inline bool ebpf_jit_enabled(void) { return bpf_jit_enable && bpf_jit_is_ebpf(); } static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) { return fp->jited && bpf_jit_is_ebpf(); } static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog) { /* These are the prerequisites, should someone ever have the * idea to call blinding outside of them, we make sure to * bail out. */ if (!bpf_jit_is_ebpf()) return false; if (!prog->jit_requested) return false; if (!bpf_jit_harden) return false; if (bpf_jit_harden == 1 && capable(CAP_SYS_ADMIN)) return false; return true; } static inline bool bpf_jit_kallsyms_enabled(void) { /* There are a couple of corner cases where kallsyms should * not be enabled f.e. on hardening. */ if (bpf_jit_harden) return false; if (!bpf_jit_kallsyms) return false; if (bpf_jit_kallsyms == 1) return true; return false; } const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char *sym); bool is_bpf_text_address(unsigned long addr); int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym); static inline const char * bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) { const char *ret = __bpf_address_lookup(addr, size, off, sym); if (ret && modname) *modname = NULL; return ret; } void bpf_prog_kallsyms_add(struct bpf_prog *fp); void bpf_prog_kallsyms_del(struct bpf_prog *fp); #else /* CONFIG_BPF_JIT */ static inline bool ebpf_jit_enabled(void) { return false; } static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog) { return false; } static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) { return false; } static inline int bpf_jit_add_poke_descriptor(struct bpf_prog *prog, struct bpf_jit_poke_descriptor *poke) { return -ENOTSUPP; } static inline void bpf_jit_free(struct bpf_prog *fp) { bpf_prog_unlock_free(fp); } static inline bool bpf_jit_kallsyms_enabled(void) { return false; } static inline const char * __bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char *sym) { return NULL; } static inline bool is_bpf_text_address(unsigned long addr) { return false; } static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym) { return -ERANGE; } static inline const char * bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) { return NULL; } static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp) { } static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp) { } #endif /* CONFIG_BPF_JIT */ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp); #define BPF_ANC BIT(15) static inline bool bpf_needs_clear_a(const struct sock_filter *first) { switch (first->code) { case BPF_RET | BPF_K: case BPF_LD | BPF_W | BPF_LEN: return false; case BPF_LD | BPF_W | BPF_ABS: case BPF_LD | BPF_H | BPF_ABS: case BPF_LD | BPF_B | BPF_ABS: if (first->k == SKF_AD_OFF + SKF_AD_ALU_XOR_X) return true; return false; default: return true; } } static inline u16 bpf_anc_helper(const struct sock_filter *ftest) { BUG_ON(ftest->code & BPF_ANC); switch (ftest->code) { case BPF_LD | BPF_W | BPF_ABS: case BPF_LD | BPF_H | BPF_ABS: case BPF_LD | BPF_B | BPF_ABS: #define BPF_ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \ return BPF_ANC | SKF_AD_##CODE switch (ftest->k) { BPF_ANCILLARY(PROTOCOL); BPF_ANCILLARY(PKTTYPE); BPF_ANCILLARY(IFINDEX); BPF_ANCILLARY(NLATTR); BPF_ANCILLARY(NLATTR_NEST); BPF_ANCILLARY(MARK); BPF_ANCILLARY(QUEUE); BPF_ANCILLARY(HATYPE); BPF_ANCILLARY(RXHASH); BPF_ANCILLARY(CPU); BPF_ANCILLARY(ALU_XOR_X); BPF_ANCILLARY(VLAN_TAG); BPF_ANCILLARY(VLAN_TAG_PRESENT); BPF_ANCILLARY(PAY_OFFSET); BPF_ANCILLARY(RANDOM); BPF_ANCILLARY(VLAN_TPID); } fallthrough; default: return ftest->code; } } void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size); static inline void *bpf_load_pointer(const struct sk_buff *skb, int k, unsigned int size, void *buffer) { if (k >= 0) return skb_header_pointer(skb, k, size, buffer); return bpf_internal_load_pointer_neg_helper(skb, k, size); } static inline int bpf_tell_extensions(void) { return SKF_AD_MAX; } struct bpf_sock_addr_kern { struct sock *sk; struct sockaddr *uaddr; /* Temporary "register" to make indirect stores to nested structures * defined above. We need three registers to make such a store, but * only two (src and dst) are available at convert_ctx_access time */ u64 tmp_reg; void *t_ctx; /* Attach type specific context. */ }; struct bpf_sock_ops_kern { struct sock *sk; union { u32 args[4]; u32 reply; u32 replylong[4]; }; struct sk_buff *syn_skb; struct sk_buff *skb; void *skb_data_end; u8 op; u8 is_fullsock; u8 remaining_opt_len; u64 temp; /* temp and everything after is not * initialized to 0 before calling * the BPF program. New fields that * should be initialized to 0 should * be inserted before temp. * temp is scratch storage used by * sock_ops_convert_ctx_access * as temporary storage of a register. */ }; struct bpf_sysctl_kern { struct ctl_table_header *head; struct ctl_table *table; void *cur_val; size_t cur_len; void *new_val; size_t new_len; int new_updated; int write; loff_t *ppos; /* Temporary "register" for indirect stores to ppos. */ u64 tmp_reg; }; struct bpf_sockopt_kern { struct sock *sk; u8 *optval; u8 *optval_end; s32 level; s32 optname; s32 optlen; s32 retval; }; int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len); struct bpf_sk_lookup_kern { u16 family; u16 protocol; __be16 sport; u16 dport; struct { __be32 saddr; __be32 daddr; } v4; struct { const struct in6_addr *saddr; const struct in6_addr *daddr; } v6; struct sock *selected_sk; bool no_reuseport; }; extern struct static_key_false bpf_sk_lookup_enabled; /* Runners for BPF_SK_LOOKUP programs to invoke on socket lookup. * * Allowed return values for a BPF SK_LOOKUP program are SK_PASS and * SK_DROP. Their meaning is as follows: * * SK_PASS && ctx.selected_sk != NULL: use selected_sk as lookup result * SK_PASS && ctx.selected_sk == NULL: continue to htable-based socket lookup * SK_DROP : terminate lookup with -ECONNREFUSED * * This macro aggregates return values and selected sockets from * multiple BPF programs according to following rules in order: * * 1. If any program returned SK_PASS and a non-NULL ctx.selected_sk, * macro result is SK_PASS and last ctx.selected_sk is used. * 2. If any program returned SK_DROP return value, * macro result is SK_DROP. * 3. Otherwise result is SK_PASS and ctx.selected_sk is NULL. * * Caller must ensure that the prog array is non-NULL, and that the * array as well as the programs it contains remain valid. */ #define BPF_PROG_SK_LOOKUP_RUN_ARRAY(array, ctx, func) \ ({ \ struct bpf_sk_lookup_kern *_ctx = &(ctx); \ struct bpf_prog_array_item *_item; \ struct sock *_selected_sk = NULL; \ bool _no_reuseport = false; \ struct bpf_prog *_prog; \ bool _all_pass = true; \ u32 _ret; \ \ migrate_disable(); \ _item = &(array)->items[0]; \ while ((_prog = READ_ONCE(_item->prog))) { \ /* restore most recent selection */ \ _ctx->selected_sk = _selected_sk; \ _ctx->no_reuseport = _no_reuseport; \ \ _ret = func(_prog, _ctx); \ if (_ret == SK_PASS && _ctx->selected_sk) { \ /* remember last non-NULL socket */ \ _selected_sk = _ctx->selected_sk; \ _no_reuseport = _ctx->no_reuseport; \ } else if (_ret == SK_DROP && _all_pass) { \ _all_pass = false; \ } \ _item++; \ } \ _ctx->selected_sk = _selected_sk; \ _ctx->no_reuseport = _no_reuseport; \ migrate_enable(); \ _all_pass || _selected_sk ? SK_PASS : SK_DROP; \ }) static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 dport, struct sock **psk) { struct bpf_prog_array *run_array; struct sock *selected_sk = NULL; bool no_reuseport = false; rcu_read_lock(); run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]); if (run_array) { struct bpf_sk_lookup_kern ctx = { .family = AF_INET, .protocol = protocol, .v4.saddr = saddr, .v4.daddr = daddr, .sport = sport, .dport = dport, }; u32 act; act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN); if (act == SK_PASS) { selected_sk = ctx.selected_sk; no_reuseport = ctx.no_reuseport; } else { selected_sk = ERR_PTR(-ECONNREFUSED); } } rcu_read_unlock(); *psk = selected_sk; return no_reuseport; } #if IS_ENABLED(CONFIG_IPV6) static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const u16 dport, struct sock **psk) { struct bpf_prog_array *run_array; struct sock *selected_sk = NULL; bool no_reuseport = false; rcu_read_lock(); run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]); if (run_array) { struct bpf_sk_lookup_kern ctx = { .family = AF_INET6, .protocol = protocol, .v6.saddr = saddr, .v6.daddr = daddr, .sport = sport, .dport = dport, }; u32 act; act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN); if (act == SK_PASS) { selected_sk = ctx.selected_sk; no_reuseport = ctx.no_reuseport; } else { selected_sk = ERR_PTR(-ECONNREFUSED); } } rcu_read_unlock(); *psk = selected_sk; return no_reuseport; } #endif /* IS_ENABLED(CONFIG_IPV6) */ #endif /* __LINUX_FILTER_H__ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_CONNTRACK_COMMON_H #define _NF_CONNTRACK_COMMON_H #include <linux/atomic.h> #include <uapi/linux/netfilter/nf_conntrack_common.h> struct ip_conntrack_stat { unsigned int found; unsigned int invalid; unsigned int insert; unsigned int insert_failed; unsigned int clash_resolve; unsigned int drop; unsigned int early_drop; unsigned int error; unsigned int expect_new; unsigned int expect_create; unsigned int expect_delete; unsigned int search_restart; }; #define NFCT_INFOMASK 7UL #define NFCT_PTRMASK ~(NFCT_INFOMASK) struct nf_conntrack { atomic_t use; }; void nf_conntrack_destroy(struct nf_conntrack *nfct); static inline void nf_conntrack_put(struct nf_conntrack *nfct) { if (nfct && atomic_dec_and_test(&nfct->use)) nf_conntrack_destroy(nfct); } static inline void nf_conntrack_get(struct nf_conntrack *nfct) { if (nfct) atomic_inc(&nfct->use); } #endif /* _NF_CONNTRACK_COMMON_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SEQ_FILE_H #define _LINUX_SEQ_FILE_H #include <linux/types.h> #include <linux/string.h> #include <linux/bug.h> #include <linux/mutex.h> #include <linux/cpumask.h> #include <linux/nodemask.h> #include <linux/fs.h> #include <linux/cred.h> struct seq_operations; struct seq_file { char *buf; size_t size; size_t from; size_t count; size_t pad_until; loff_t index; loff_t read_pos; struct mutex lock; const struct seq_operations *op; int poll_event; const struct file *file; void *private; }; struct seq_operations { void * (*start) (struct seq_file *m, loff_t *pos); void (*stop) (struct seq_file *m, void *v); void * (*next) (struct seq_file *m, void *v, loff_t *pos); int (*show) (struct seq_file *m, void *v); }; #define SEQ_SKIP 1 /** * seq_has_overflowed - check if the buffer has overflowed * @m: the seq_file handle * * seq_files have a buffer which may overflow. When this happens a larger * buffer is reallocated and all the data will be printed again. * The overflow state is true when m->count == m->size. * * Returns true if the buffer received more than it can hold. */ static inline bool seq_has_overflowed(struct seq_file *m) { return m->count == m->size; } /** * seq_get_buf - get buffer to write arbitrary data to * @m: the seq_file handle * @bufp: the beginning of the buffer is stored here * * Return the number of bytes available in the buffer, or zero if * there's no space. */ static inline size_t seq_get_buf(struct seq_file *m, char **bufp) { BUG_ON(m->count > m->size); if (m->count < m->size) *bufp = m->buf + m->count; else *bufp = NULL; return m->size - m->count; } /** * seq_commit - commit data to the buffer * @m: the seq_file handle * @num: the number of bytes to commit * * Commit @num bytes of data written to a buffer previously acquired * by seq_buf_get. To signal an error condition, or that the data * didn't fit in the available space, pass a negative @num value. */ static inline void seq_commit(struct seq_file *m, int num) { if (num < 0) { m->count = m->size; } else { BUG_ON(m->count + num > m->size); m->count += num; } } /** * seq_setwidth - set padding width * @m: the seq_file handle * @size: the max number of bytes to pad. * * Call seq_setwidth() for setting max width, then call seq_printf() etc. and * finally call seq_pad() to pad the remaining bytes. */ static inline void seq_setwidth(struct seq_file *m, size_t size) { m->pad_until = m->count + size; } void seq_pad(struct seq_file *m, char c); char *mangle_path(char *s, const char *p, const char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter); loff_t seq_lseek(struct file *, loff_t, int); int seq_release(struct inode *, struct file *); int seq_write(struct seq_file *seq, const void *data, size_t len); __printf(2, 0) void seq_vprintf(struct seq_file *m, const char *fmt, va_list args); __printf(2, 3) void seq_printf(struct seq_file *m, const char *fmt, ...); void seq_putc(struct seq_file *m, char c); void seq_puts(struct seq_file *m, const char *s); void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter, unsigned long long num, unsigned int width); void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, unsigned long long num); void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num); void seq_put_hex_ll(struct seq_file *m, const char *delimiter, unsigned long long v, unsigned int width); void seq_escape(struct seq_file *m, const char *s, const char *esc); void seq_escape_mem_ascii(struct seq_file *m, const char *src, size_t isz); void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii); int seq_path(struct seq_file *, const struct path *, const char *); int seq_file_path(struct seq_file *, struct file *, const char *); int seq_dentry(struct seq_file *, struct dentry *, const char *); int seq_path_root(struct seq_file *m, const struct path *path, const struct path *root, const char *esc); int single_open(struct file *, int (*)(struct seq_file *, void *), void *); int single_open_size(struct file *, int (*)(struct seq_file *, void *), void *, size_t); int single_release(struct inode *, struct file *); void *__seq_open_private(struct file *, const struct seq_operations *, int); int seq_open_private(struct file *, const struct seq_operations *, int); int seq_release_private(struct inode *, struct file *); #define DEFINE_SEQ_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ int ret = seq_open(file, &__name ## _sops); \ if (!ret && inode->i_private) { \ struct seq_file *seq_f = file->private_data; \ seq_f->private = inode->i_private; \ } \ return ret; \ } \ \ static const struct file_operations __name ## _fops = { \ .owner = THIS_MODULE, \ .open = __name ## _open, \ .read = seq_read, \ .llseek = seq_lseek, \ .release = seq_release, \ } #define DEFINE_SHOW_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ return single_open(file, __name ## _show, inode->i_private); \ } \ \ static const struct file_operations __name ## _fops = { \ .owner = THIS_MODULE, \ .open = __name ## _open, \ .read = seq_read, \ .llseek = seq_lseek, \ .release = single_release, \ } #define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ return single_open(file, __name ## _show, PDE_DATA(inode)); \ } \ \ static const struct proc_ops __name ## _proc_ops = { \ .proc_open = __name ## _open, \ .proc_read = seq_read, \ .proc_lseek = seq_lseek, \ .proc_release = single_release, \ } static inline struct user_namespace *seq_user_ns(struct seq_file *seq) { #ifdef CONFIG_USER_NS return seq->file->f_cred->user_ns; #else extern struct user_namespace init_user_ns; return &init_user_ns; #endif } /** * seq_show_options - display mount options with appropriate escapes. * @m: the seq_file handle * @name: the mount option name * @value: the mount option name's value, can be NULL */ static inline void seq_show_option(struct seq_file *m, const char *name, const char *value) { seq_putc(m, ','); seq_escape(m, name, ",= \t\n\\"); if (value) { seq_putc(m, '='); seq_escape(m, value, ", \t\n\\"); } } /** * seq_show_option_n - display mount options with appropriate escapes * where @value must be a specific length. * @m: the seq_file handle * @name: the mount option name * @value: the mount option name's value, cannot be NULL * @length: the length of @value to display * * This is a macro since this uses "length" to define the size of the * stack buffer. */ #define seq_show_option_n(m, name, value, length) { \ char val_buf[length + 1]; \ strncpy(val_buf, value, length); \ val_buf[length] = '\0'; \ seq_show_option(m, name, val_buf); \ } #define SEQ_START_TOKEN ((void *)1) /* * Helpers for iteration over list_head-s in seq_files */ extern struct list_head *seq_list_start(struct list_head *head, loff_t pos); extern struct list_head *seq_list_start_head(struct list_head *head, loff_t pos); extern struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos); /* * Helpers for iteration over hlist_head-s in seq_files */ extern struct hlist_node *seq_hlist_start(struct hlist_head *head, loff_t pos); extern struct hlist_node *seq_hlist_start_head(struct hlist_head *head, loff_t pos); extern struct hlist_node *seq_hlist_next(void *v, struct hlist_head *head, loff_t *ppos); extern struct hlist_node *seq_hlist_start_rcu(struct hlist_head *head, loff_t pos); extern struct hlist_node *seq_hlist_start_head_rcu(struct hlist_head *head, loff_t pos); extern struct hlist_node *seq_hlist_next_rcu(void *v, struct hlist_head *head, loff_t *ppos); /* Helpers for iterating over per-cpu hlist_head-s in seq_files */ extern struct hlist_node *seq_hlist_start_percpu(struct hlist_head __percpu *head, int *cpu, loff_t pos); extern struct hlist_node *seq_hlist_next_percpu(void *v, struct hlist_head __percpu *head, int *cpu, loff_t *pos); void seq_file_init(void); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM oom #if !defined(_TRACE_OOM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_OOM_H #include <linux/tracepoint.h> #include <trace/events/mmflags.h> TRACE_EVENT(oom_score_adj_update, TP_PROTO(struct task_struct *task), TP_ARGS(task), TP_STRUCT__entry( __field( pid_t, pid) __array( char, comm, TASK_COMM_LEN ) __field( short, oom_score_adj) ), TP_fast_assign( __entry->pid = task->pid; memcpy(__entry->comm, task->comm, TASK_COMM_LEN); __entry->oom_score_adj = task->signal->oom_score_adj; ), TP_printk("pid=%d comm=%s oom_score_adj=%hd", __entry->pid, __entry->comm, __entry->oom_score_adj) ); TRACE_EVENT(reclaim_retry_zone, TP_PROTO(struct zoneref *zoneref, int order, unsigned long reclaimable, unsigned long available, unsigned long min_wmark, int no_progress_loops, bool wmark_check), TP_ARGS(zoneref, order, reclaimable, available, min_wmark, no_progress_loops, wmark_check), TP_STRUCT__entry( __field( int, node) __field( int, zone_idx) __field( int, order) __field( unsigned long, reclaimable) __field( unsigned long, available) __field( unsigned long, min_wmark) __field( int, no_progress_loops) __field( bool, wmark_check) ), TP_fast_assign( __entry->node = zone_to_nid(zoneref->zone); __entry->zone_idx = zoneref->zone_idx; __entry->order = order; __entry->reclaimable = reclaimable; __entry->available = available; __entry->min_wmark = min_wmark; __entry->no_progress_loops = no_progress_loops; __entry->wmark_check = wmark_check; ), TP_printk("node=%d zone=%-8s order=%d reclaimable=%lu available=%lu min_wmark=%lu no_progress_loops=%d wmark_check=%d", __entry->node, __print_symbolic(__entry->zone_idx, ZONE_TYPE), __entry->order, __entry->reclaimable, __entry->available, __entry->min_wmark, __entry->no_progress_loops, __entry->wmark_check) ); TRACE_EVENT(mark_victim, TP_PROTO(int pid), TP_ARGS(pid), TP_STRUCT__entry( __field(int, pid) ), TP_fast_assign( __entry->pid = pid; ), TP_printk("pid=%d", __entry->pid) ); TRACE_EVENT(wake_reaper, TP_PROTO(int pid), TP_ARGS(pid), TP_STRUCT__entry( __field(int, pid) ), TP_fast_assign( __entry->pid = pid; ), TP_printk("pid=%d", __entry->pid) ); TRACE_EVENT(start_task_reaping, TP_PROTO(int pid), TP_ARGS(pid), TP_STRUCT__entry( __field(int, pid) ), TP_fast_assign( __entry->pid = pid; ), TP_printk("pid=%d", __entry->pid) ); TRACE_EVENT(finish_task_reaping, TP_PROTO(int pid), TP_ARGS(pid), TP_STRUCT__entry( __field(int, pid) ), TP_fast_assign( __entry->pid = pid; ), TP_printk("pid=%d", __entry->pid) ); TRACE_EVENT(skip_task_reaping, TP_PROTO(int pid), TP_ARGS(pid), TP_STRUCT__entry( __field(int, pid) ), TP_fast_assign( __entry->pid = pid; ), TP_printk("pid=%d", __entry->pid) ); #ifdef CONFIG_COMPACTION TRACE_EVENT(compact_retry, TP_PROTO(int order, enum compact_priority priority, enum compact_result result, int retries, int max_retries, bool ret), TP_ARGS(order, priority, result, retries, max_retries, ret), TP_STRUCT__entry( __field( int, order) __field( int, priority) __field( int, result) __field( int, retries) __field( int, max_retries) __field( bool, ret) ), TP_fast_assign( __entry->order = order; __entry->priority = priority; __entry->result = compact_result_to_feedback(result); __entry->retries = retries; __entry->max_retries = max_retries; __entry->ret = ret; ), TP_printk("order=%d priority=%s compaction_result=%s retries=%d max_retries=%d should_retry=%d", __entry->order, __print_symbolic(__entry->priority, COMPACTION_PRIORITY), __print_symbolic(__entry->result, COMPACTION_FEEDBACK), __entry->retries, __entry->max_retries, __entry->ret) ); #endif /* CONFIG_COMPACTION */ #endif /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Authentication token and access key management * * Copyright (C) 2004, 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * See Documentation/security/keys/core.rst for information on keys/keyrings. */ #ifndef _LINUX_KEY_H #define _LINUX_KEY_H #include <linux/types.h> #include <linux/list.h> #include <linux/rbtree.h> #include <linux/rcupdate.h> #include <linux/sysctl.h> #include <linux/rwsem.h> #include <linux/atomic.h> #include <linux/assoc_array.h> #include <linux/refcount.h> #include <linux/time64.h> #ifdef __KERNEL__ #include <linux/uidgid.h> /* key handle serial number */ typedef int32_t key_serial_t; /* key handle permissions mask */ typedef uint32_t key_perm_t; struct key; struct net; #ifdef CONFIG_KEYS #undef KEY_DEBUGGING #define KEY_POS_VIEW 0x01000000 /* possessor can view a key's attributes */ #define KEY_POS_READ 0x02000000 /* possessor can read key payload / view keyring */ #define KEY_POS_WRITE 0x04000000 /* possessor can update key payload / add link to keyring */ #define KEY_POS_SEARCH 0x08000000 /* possessor can find a key in search / search a keyring */ #define KEY_POS_LINK 0x10000000 /* possessor can create a link to a key/keyring */ #define KEY_POS_SETATTR 0x20000000 /* possessor can set key attributes */ #define KEY_POS_ALL 0x3f000000 #define KEY_USR_VIEW 0x00010000 /* user permissions... */ #define KEY_USR_READ 0x00020000 #define KEY_USR_WRITE 0x00040000 #define KEY_USR_SEARCH 0x00080000 #define KEY_USR_LINK 0x00100000 #define KEY_USR_SETATTR 0x00200000 #define KEY_USR_ALL 0x003f0000 #define KEY_GRP_VIEW 0x00000100 /* group permissions... */ #define KEY_GRP_READ 0x00000200 #define KEY_GRP_WRITE 0x00000400 #define KEY_GRP_SEARCH 0x00000800 #define KEY_GRP_LINK 0x00001000 #define KEY_GRP_SETATTR 0x00002000 #define KEY_GRP_ALL 0x00003f00 #define KEY_OTH_VIEW 0x00000001 /* third party permissions... */ #define KEY_OTH_READ 0x00000002 #define KEY_OTH_WRITE 0x00000004 #define KEY_OTH_SEARCH 0x00000008 #define KEY_OTH_LINK 0x00000010 #define KEY_OTH_SETATTR 0x00000020 #define KEY_OTH_ALL 0x0000003f #define KEY_PERM_UNDEF 0xffffffff /* * The permissions required on a key that we're looking up. */ enum key_need_perm { KEY_NEED_UNSPECIFIED, /* Needed permission unspecified */ KEY_NEED_VIEW, /* Require permission to view attributes */ KEY_NEED_READ, /* Require permission to read content */ KEY_NEED_WRITE, /* Require permission to update / modify */ KEY_NEED_SEARCH, /* Require permission to search (keyring) or find (key) */ KEY_NEED_LINK, /* Require permission to link */ KEY_NEED_SETATTR, /* Require permission to change attributes */ KEY_NEED_UNLINK, /* Require permission to unlink key */ KEY_SYSADMIN_OVERRIDE, /* Special: override by CAP_SYS_ADMIN */ KEY_AUTHTOKEN_OVERRIDE, /* Special: override by possession of auth token */ KEY_DEFER_PERM_CHECK, /* Special: permission check is deferred */ }; struct seq_file; struct user_struct; struct signal_struct; struct cred; struct key_type; struct key_owner; struct key_tag; struct keyring_list; struct keyring_name; struct key_tag { struct rcu_head rcu; refcount_t usage; bool removed; /* T when subject removed */ }; struct keyring_index_key { /* [!] If this structure is altered, the union in struct key must change too! */ unsigned long hash; /* Hash value */ union { struct { #ifdef __LITTLE_ENDIAN /* Put desc_len at the LSB of x */ u16 desc_len; char desc[sizeof(long) - 2]; /* First few chars of description */ #else char desc[sizeof(long) - 2]; /* First few chars of description */ u16 desc_len; #endif }; unsigned long x; }; struct key_type *type; struct key_tag *domain_tag; /* Domain of operation */ const char *description; }; union key_payload { void __rcu *rcu_data0; void *data[4]; }; /*****************************************************************************/ /* * key reference with possession attribute handling * * NOTE! key_ref_t is a typedef'd pointer to a type that is not actually * defined. This is because we abuse the bottom bit of the reference to carry a * flag to indicate whether the calling process possesses that key in one of * its keyrings. * * the key_ref_t has been made a separate type so that the compiler can reject * attempts to dereference it without proper conversion. * * the three functions are used to assemble and disassemble references */ typedef struct __key_reference_with_attributes *key_ref_t; static inline key_ref_t make_key_ref(const struct key *key, bool possession) { return (key_ref_t) ((unsigned long) key | possession); } static inline struct key *key_ref_to_ptr(const key_ref_t key_ref) { return (struct key *) ((unsigned long) key_ref & ~1UL); } static inline bool is_key_possessed(const key_ref_t key_ref) { return (unsigned long) key_ref & 1UL; } typedef int (*key_restrict_link_func_t)(struct key *dest_keyring, const struct key_type *type, const union key_payload *payload, struct key *restriction_key); struct key_restriction { key_restrict_link_func_t check; struct key *key; struct key_type *keytype; }; enum key_state { KEY_IS_UNINSTANTIATED, KEY_IS_POSITIVE, /* Positively instantiated */ }; /*****************************************************************************/ /* * authentication token / access credential / keyring * - types of key include: * - keyrings * - disk encryption IDs * - Kerberos TGTs and tickets */ struct key { refcount_t usage; /* number of references */ key_serial_t serial; /* key serial number */ union { struct list_head graveyard_link; struct rb_node serial_node; }; #ifdef CONFIG_KEY_NOTIFICATIONS struct watch_list *watchers; /* Entities watching this key for changes */ #endif struct rw_semaphore sem; /* change vs change sem */ struct key_user *user; /* owner of this key */ void *security; /* security data for this key */ union { time64_t expiry; /* time at which key expires (or 0) */ time64_t revoked_at; /* time at which key was revoked */ }; time64_t last_used_at; /* last time used for LRU keyring discard */ kuid_t uid; kgid_t gid; key_perm_t perm; /* access permissions */ unsigned short quotalen; /* length added to quota */ unsigned short datalen; /* payload data length * - may not match RCU dereferenced payload * - payload should contain own length */ short state; /* Key state (+) or rejection error (-) */ #ifdef KEY_DEBUGGING unsigned magic; #define KEY_DEBUG_MAGIC 0x18273645u #endif unsigned long flags; /* status flags (change with bitops) */ #define KEY_FLAG_DEAD 0 /* set if key type has been deleted */ #define KEY_FLAG_REVOKED 1 /* set if key had been revoked */ #define KEY_FLAG_IN_QUOTA 2 /* set if key consumes quota */ #define KEY_FLAG_USER_CONSTRUCT 3 /* set if key is being constructed in userspace */ #define KEY_FLAG_ROOT_CAN_CLEAR 4 /* set if key can be cleared by root without permission */ #define KEY_FLAG_INVALIDATED 5 /* set if key has been invalidated */ #define KEY_FLAG_BUILTIN 6 /* set if key is built in to the kernel */ #define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */ #define KEY_FLAG_KEEP 8 /* set if key should not be removed */ #define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */ /* the key type and key description string * - the desc is used to match a key against search criteria * - it should be a printable string * - eg: for krb5 AFS, this might be "afs@REDHAT.COM" */ union { struct keyring_index_key index_key; struct { unsigned long hash; unsigned long len_desc; struct key_type *type; /* type of key */ struct key_tag *domain_tag; /* Domain of operation */ char *description; }; }; /* key data * - this is used to hold the data actually used in cryptography or * whatever */ union { union key_payload payload; struct { /* Keyring bits */ struct list_head name_link; struct assoc_array keys; }; }; /* This is set on a keyring to restrict the addition of a link to a key * to it. If this structure isn't provided then it is assumed that the * keyring is open to any addition. It is ignored for non-keyring * keys. Only set this value using keyring_restrict(), keyring_alloc(), * or key_alloc(). * * This is intended for use with rings of trusted keys whereby addition * to the keyring needs to be controlled. KEY_ALLOC_BYPASS_RESTRICTION * overrides this, allowing the kernel to add extra keys without * restriction. */ struct key_restriction *restrict_link; }; extern struct key *key_alloc(struct key_type *type, const char *desc, kuid_t uid, kgid_t gid, const struct cred *cred, key_perm_t perm, unsigned long flags, struct key_restriction *restrict_link); #define KEY_ALLOC_IN_QUOTA 0x0000 /* add to quota, reject if would overrun */ #define KEY_ALLOC_QUOTA_OVERRUN 0x0001 /* add to quota, permit even if overrun */ #define KEY_ALLOC_NOT_IN_QUOTA 0x0002 /* not in quota */ #define KEY_ALLOC_BUILT_IN 0x0004 /* Key is built into kernel */ #define KEY_ALLOC_BYPASS_RESTRICTION 0x0008 /* Override the check on restricted keyrings */ #define KEY_ALLOC_UID_KEYRING 0x0010 /* allocating a user or user session keyring */ #define KEY_ALLOC_SET_KEEP 0x0020 /* Set the KEEP flag on the key/keyring */ extern void key_revoke(struct key *key); extern void key_invalidate(struct key *key); extern void key_put(struct key *key); extern bool key_put_tag(struct key_tag *tag); extern void key_remove_domain(struct key_tag *domain_tag); static inline struct key *__key_get(struct key *key) { refcount_inc(&key->usage); return key; } static inline struct key *key_get(struct key *key) { return key ? __key_get(key) : key; } static inline void key_ref_put(key_ref_t key_ref) { key_put(key_ref_to_ptr(key_ref)); } extern struct key *request_key_tag(struct key_type *type, const char *description, struct key_tag *domain_tag, const char *callout_info); extern struct key *request_key_rcu(struct key_type *type, const char *description, struct key_tag *domain_tag); extern struct key *request_key_with_auxdata(struct key_type *type, const char *description, struct key_tag *domain_tag, const void *callout_info, size_t callout_len, void *aux); /** * request_key - Request a key and wait for construction * @type: Type of key. * @description: The searchable description of the key. * @callout_info: The data to pass to the instantiation upcall (or NULL). * * As for request_key_tag(), but with the default global domain tag. */ static inline struct key *request_key(struct key_type *type, const char *description, const char *callout_info) { return request_key_tag(type, description, NULL, callout_info); } #ifdef CONFIG_NET /** * request_key_net - Request a key for a net namespace and wait for construction * @type: Type of key. * @description: The searchable description of the key. * @net: The network namespace that is the key's domain of operation. * @callout_info: The data to pass to the instantiation upcall (or NULL). * * As for request_key() except that it does not add the returned key to a * keyring if found, new keys are always allocated in the user's quota, the * callout_info must be a NUL-terminated string and no auxiliary data can be * passed. Only keys that operate the specified network namespace are used. * * Furthermore, it then works as wait_for_key_construction() to wait for the * completion of keys undergoing construction with a non-interruptible wait. */ #define request_key_net(type, description, net, callout_info) \ request_key_tag(type, description, net->key_domain, callout_info); /** * request_key_net_rcu - Request a key for a net namespace under RCU conditions * @type: Type of key. * @description: The searchable description of the key. * @net: The network namespace that is the key's domain of operation. * * As for request_key_rcu() except that only keys that operate the specified * network namespace are used. */ #define request_key_net_rcu(type, description, net) \ request_key_rcu(type, description, net->key_domain); #endif /* CONFIG_NET */ extern int wait_for_key_construction(struct key *key, bool intr); extern int key_validate(const struct key *key); extern key_ref_t key_create_or_update(key_ref_t keyring, const char *type, const char *description, const void *payload, size_t plen, key_perm_t perm, unsigned long flags); extern int key_update(key_ref_t key, const void *payload, size_t plen); extern int key_link(struct key *keyring, struct key *key); extern int key_move(struct key *key, struct key *from_keyring, struct key *to_keyring, unsigned int flags); extern int key_unlink(struct key *keyring, struct key *key); extern struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid, const struct cred *cred, key_perm_t perm, unsigned long flags, struct key_restriction *restrict_link, struct key *dest); extern int restrict_link_reject(struct key *keyring, const struct key_type *type, const union key_payload *payload, struct key *restriction_key); extern int keyring_clear(struct key *keyring); extern key_ref_t keyring_search(key_ref_t keyring, struct key_type *type, const char *description, bool recurse); extern int keyring_add_key(struct key *keyring, struct key *key); extern int keyring_restrict(key_ref_t keyring, const char *type, const char *restriction); extern struct key *key_lookup(key_serial_t id); static inline key_serial_t key_serial(const struct key *key) { return key ? key->serial : 0; } extern void key_set_timeout(struct key *, unsigned); extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags, enum key_need_perm need_perm); extern void key_free_user_ns(struct user_namespace *); static inline short key_read_state(const struct key *key) { /* Barrier versus mark_key_instantiated(). */ return smp_load_acquire(&key->state); } /** * key_is_positive - Determine if a key has been positively instantiated * @key: The key to check. * * Return true if the specified key has been positively instantiated, false * otherwise. */ static inline bool key_is_positive(const struct key *key) { return key_read_state(key) == KEY_IS_POSITIVE; } static inline bool key_is_negative(const struct key *key) { return key_read_state(key) < 0; } #define dereference_key_rcu(KEY) \ (rcu_dereference((KEY)->payload.rcu_data0)) #define dereference_key_locked(KEY) \ (rcu_dereference_protected((KEY)->payload.rcu_data0, \ rwsem_is_locked(&((struct key *)(KEY))->sem))) #define rcu_assign_keypointer(KEY, PAYLOAD) \ do { \ rcu_assign_pointer((KEY)->payload.rcu_data0, (PAYLOAD)); \ } while (0) #ifdef CONFIG_SYSCTL extern struct ctl_table key_sysctls[]; #endif /* * the userspace interface */ extern int install_thread_keyring_to_cred(struct cred *cred); extern void key_fsuid_changed(struct cred *new_cred); extern void key_fsgid_changed(struct cred *new_cred); extern void key_init(void); #else /* CONFIG_KEYS */ #define key_validate(k) 0 #define key_serial(k) 0 #define key_get(k) ({ NULL; }) #define key_revoke(k) do { } while(0) #define key_invalidate(k) do { } while(0) #define key_put(k) do { } while(0) #define key_ref_put(k) do { } while(0) #define make_key_ref(k, p) NULL #define key_ref_to_ptr(k) NULL #define is_key_possessed(k) 0 #define key_fsuid_changed(c) do { } while(0) #define key_fsgid_changed(c) do { } while(0) #define key_init() do { } while(0) #define key_free_user_ns(ns) do { } while(0) #define key_remove_domain(d) do { } while(0) #endif /* CONFIG_KEYS */ #endif /* __KERNEL__ */ #endif /* _LINUX_KEY_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_COMPAT_H #define _LINUX_COMPAT_H /* * These are the type definitions for the architecture specific * syscall compatibility layer. */ #include <linux/types.h> #include <linux/time.h> #include <linux/stat.h> #include <linux/param.h> /* for HZ */ #include <linux/sem.h> #include <linux/socket.h> #include <linux/if.h> #include <linux/fs.h> #include <linux/aio_abi.h> /* for aio_context_t */ #include <linux/uaccess.h> #include <linux/unistd.h> #include <asm/compat.h> #ifdef CONFIG_COMPAT #include <asm/siginfo.h> #include <asm/signal.h> #endif #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER /* * It may be useful for an architecture to override the definitions of the * COMPAT_SYSCALL_DEFINE0 and COMPAT_SYSCALL_DEFINEx() macros, in particular * to use a different calling convention for syscalls. To allow for that, + the prototypes for the compat_sys_*() functions below will *not* be included * if CONFIG_ARCH_HAS_SYSCALL_WRAPPER is enabled. */ #include <asm/syscall_wrapper.h> #endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */ #ifndef COMPAT_USE_64BIT_TIME #define COMPAT_USE_64BIT_TIME 0 #endif #ifndef __SC_DELOUSE #define __SC_DELOUSE(t,v) ((__force t)(unsigned long)(v)) #endif #ifndef COMPAT_SYSCALL_DEFINE0 #define COMPAT_SYSCALL_DEFINE0(name) \ asmlinkage long compat_sys_##name(void); \ ALLOW_ERROR_INJECTION(compat_sys_##name, ERRNO); \ asmlinkage long compat_sys_##name(void) #endif /* COMPAT_SYSCALL_DEFINE0 */ #define COMPAT_SYSCALL_DEFINE1(name, ...) \ COMPAT_SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) #define COMPAT_SYSCALL_DEFINE2(name, ...) \ COMPAT_SYSCALL_DEFINEx(2, _##name, __VA_ARGS__) #define COMPAT_SYSCALL_DEFINE3(name, ...) \ COMPAT_SYSCALL_DEFINEx(3, _##name, __VA_ARGS__) #define COMPAT_SYSCALL_DEFINE4(name, ...) \ COMPAT_SYSCALL_DEFINEx(4, _##name, __VA_ARGS__) #define COMPAT_SYSCALL_DEFINE5(name, ...) \ COMPAT_SYSCALL_DEFINEx(5, _##name, __VA_ARGS__) #define COMPAT_SYSCALL_DEFINE6(name, ...) \ COMPAT_SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) /* * The asmlinkage stub is aliased to a function named __se_compat_sys_*() which * sign-extends 32-bit ints to longs whenever needed. The actual work is * done within __do_compat_sys_*(). */ #ifndef COMPAT_SYSCALL_DEFINEx #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ __diag_push(); \ __diag_ignore(GCC, 8, "-Wattribute-alias", \ "Type aliasing is used to sanitize syscall arguments");\ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(__se_compat_sys##name)))); \ ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ long ret = __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\ __MAP(x,__SC_TEST,__VA_ARGS__); \ return ret; \ } \ __diag_pop(); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* COMPAT_SYSCALL_DEFINEx */ struct compat_iovec { compat_uptr_t iov_base; compat_size_t iov_len; }; #ifdef CONFIG_COMPAT #ifndef compat_user_stack_pointer #define compat_user_stack_pointer() current_user_stack_pointer() #endif #ifndef compat_sigaltstack /* we'll need that for MIPS */ typedef struct compat_sigaltstack { compat_uptr_t ss_sp; int ss_flags; compat_size_t ss_size; } compat_stack_t; #endif #ifndef COMPAT_MINSIGSTKSZ #define COMPAT_MINSIGSTKSZ MINSIGSTKSZ #endif #define compat_jiffies_to_clock_t(x) \ (((unsigned long)(x) * COMPAT_USER_HZ) / HZ) typedef __compat_uid32_t compat_uid_t; typedef __compat_gid32_t compat_gid_t; struct compat_sel_arg_struct; struct rusage; struct old_itimerval32; struct compat_tms { compat_clock_t tms_utime; compat_clock_t tms_stime; compat_clock_t tms_cutime; compat_clock_t tms_cstime; }; #define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW) typedef struct { compat_sigset_word sig[_COMPAT_NSIG_WORDS]; } compat_sigset_t; int set_compat_user_sigmask(const compat_sigset_t __user *umask, size_t sigsetsize); struct compat_sigaction { #ifndef __ARCH_HAS_IRIX_SIGACTION compat_uptr_t sa_handler; compat_ulong_t sa_flags; #else compat_uint_t sa_flags; compat_uptr_t sa_handler; #endif #ifdef __ARCH_HAS_SA_RESTORER compat_uptr_t sa_restorer; #endif compat_sigset_t sa_mask __packed; }; typedef union compat_sigval { compat_int_t sival_int; compat_uptr_t sival_ptr; } compat_sigval_t; typedef struct compat_siginfo { int si_signo; #ifndef __ARCH_HAS_SWAPPED_SIGINFO int si_errno; int si_code; #else int si_code; int si_errno; #endif union { int _pad[128/sizeof(int) - 3]; /* kill() */ struct { compat_pid_t _pid; /* sender's pid */ __compat_uid32_t _uid; /* sender's uid */ } _kill; /* POSIX.1b timers */ struct { compat_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ compat_sigval_t _sigval; /* same as below */ } _timer; /* POSIX.1b signals */ struct { compat_pid_t _pid; /* sender's pid */ __compat_uid32_t _uid; /* sender's uid */ compat_sigval_t _sigval; } _rt; /* SIGCHLD */ struct { compat_pid_t _pid; /* which child */ __compat_uid32_t _uid; /* sender's uid */ int _status; /* exit code */ compat_clock_t _utime; compat_clock_t _stime; } _sigchld; #ifdef CONFIG_X86_X32_ABI /* SIGCHLD (x32 version) */ struct { compat_pid_t _pid; /* which child */ __compat_uid32_t _uid; /* sender's uid */ int _status; /* exit code */ compat_s64 _utime; compat_s64 _stime; } _sigchld_x32; #endif /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGTRAP, SIGEMT */ struct { compat_uptr_t _addr; /* faulting insn/memory ref. */ #ifdef __ARCH_SI_TRAPNO int _trapno; /* TRAP # which caused the signal */ #endif #define __COMPAT_ADDR_BND_PKEY_PAD (__alignof__(compat_uptr_t) < sizeof(short) ? \ sizeof(short) : __alignof__(compat_uptr_t)) union { /* * used when si_code=BUS_MCEERR_AR or * used when si_code=BUS_MCEERR_AO */ short int _addr_lsb; /* Valid LSB of the reported address. */ /* used when si_code=SEGV_BNDERR */ struct { char _dummy_bnd[__COMPAT_ADDR_BND_PKEY_PAD]; compat_uptr_t _lower; compat_uptr_t _upper; } _addr_bnd; /* used when si_code=SEGV_PKUERR */ struct { char _dummy_pkey[__COMPAT_ADDR_BND_PKEY_PAD]; u32 _pkey; } _addr_pkey; }; } _sigfault; /* SIGPOLL */ struct { compat_long_t _band; /* POLL_IN, POLL_OUT, POLL_MSG */ int _fd; } _sigpoll; struct { compat_uptr_t _call_addr; /* calling user insn */ int _syscall; /* triggering system call number */ unsigned int _arch; /* AUDIT_ARCH_* of syscall */ } _sigsys; } _sifields; } compat_siginfo_t; struct compat_rlimit { compat_ulong_t rlim_cur; compat_ulong_t rlim_max; }; struct compat_rusage { struct old_timeval32 ru_utime; struct old_timeval32 ru_stime; compat_long_t ru_maxrss; compat_long_t ru_ixrss; compat_long_t ru_idrss; compat_long_t ru_isrss; compat_long_t ru_minflt; compat_long_t ru_majflt; compat_long_t ru_nswap; compat_long_t ru_inblock; compat_long_t ru_oublock; compat_long_t ru_msgsnd; compat_long_t ru_msgrcv; compat_long_t ru_nsignals; compat_long_t ru_nvcsw; compat_long_t ru_nivcsw; }; extern int put_compat_rusage(const struct rusage *, struct compat_rusage __user *); struct compat_siginfo; struct __compat_aio_sigset; struct compat_dirent { u32 d_ino; compat_off_t d_off; u16 d_reclen; char d_name[256]; }; struct compat_ustat { compat_daddr_t f_tfree; compat_ino_t f_tinode; char f_fname[6]; char f_fpack[6]; }; #define COMPAT_SIGEV_PAD_SIZE ((SIGEV_MAX_SIZE/sizeof(int)) - 3) typedef struct compat_sigevent { compat_sigval_t sigev_value; compat_int_t sigev_signo; compat_int_t sigev_notify; union { compat_int_t _pad[COMPAT_SIGEV_PAD_SIZE]; compat_int_t _tid; struct { compat_uptr_t _function; compat_uptr_t _attribute; } _sigev_thread; } _sigev_un; } compat_sigevent_t; struct compat_ifmap { compat_ulong_t mem_start; compat_ulong_t mem_end; unsigned short base_addr; unsigned char irq; unsigned char dma; unsigned char port; }; struct compat_if_settings { unsigned int type; /* Type of physical device or protocol */ unsigned int size; /* Size of the data allocated by the caller */ compat_uptr_t ifs_ifsu; /* union of pointers */ }; struct compat_ifreq { union { char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */ } ifr_ifrn; union { struct sockaddr ifru_addr; struct sockaddr ifru_dstaddr; struct sockaddr ifru_broadaddr; struct sockaddr ifru_netmask; struct sockaddr ifru_hwaddr; short ifru_flags; compat_int_t ifru_ivalue; compat_int_t ifru_mtu; struct compat_ifmap ifru_map; char ifru_slave[IFNAMSIZ]; /* Just fits the size */ char ifru_newname[IFNAMSIZ]; compat_caddr_t ifru_data; struct compat_if_settings ifru_settings; } ifr_ifru; }; struct compat_ifconf { compat_int_t ifc_len; /* size of buffer */ compat_caddr_t ifcbuf; }; struct compat_robust_list { compat_uptr_t next; }; struct compat_robust_list_head { struct compat_robust_list list; compat_long_t futex_offset; compat_uptr_t list_op_pending; }; #ifdef CONFIG_COMPAT_OLD_SIGACTION struct compat_old_sigaction { compat_uptr_t sa_handler; compat_old_sigset_t sa_mask; compat_ulong_t sa_flags; compat_uptr_t sa_restorer; }; #endif struct compat_keyctl_kdf_params { compat_uptr_t hashname; compat_uptr_t otherinfo; __u32 otherinfolen; __u32 __spare[8]; }; struct compat_statfs; struct compat_statfs64; struct compat_old_linux_dirent; struct compat_linux_dirent; struct linux_dirent64; struct compat_msghdr; struct compat_mmsghdr; struct compat_sysinfo; struct compat_sysctl_args; struct compat_kexec_segment; struct compat_mq_attr; struct compat_msgbuf; #define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t)) #define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG) long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask, unsigned long bitmap_size); long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask, unsigned long bitmap_size); void copy_siginfo_to_external32(struct compat_siginfo *to, const struct kernel_siginfo *from); int copy_siginfo_from_user32(kernel_siginfo_t *to, const struct compat_siginfo __user *from); int __copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginfo_t *from); #ifndef copy_siginfo_to_user32 #define copy_siginfo_to_user32 __copy_siginfo_to_user32 #endif int get_compat_sigevent(struct sigevent *event, const struct compat_sigevent __user *u_event); extern int get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat); /* * Defined inline such that size can be compile time constant, which avoids * CONFIG_HARDENED_USERCOPY complaining about copies from task_struct */ static inline int put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, unsigned int size) { /* size <= sizeof(compat_sigset_t) <= sizeof(sigset_t) */ #ifdef __BIG_ENDIAN compat_sigset_t v; switch (_NSIG_WORDS) { case 4: v.sig[7] = (set->sig[3] >> 32); v.sig[6] = set->sig[3]; fallthrough; case 3: v.sig[5] = (set->sig[2] >> 32); v.sig[4] = set->sig[2]; fallthrough; case 2: v.sig[3] = (set->sig[1] >> 32); v.sig[2] = set->sig[1]; fallthrough; case 1: v.sig[1] = (set->sig[0] >> 32); v.sig[0] = set->sig[0]; } return copy_to_user(compat, &v, size) ? -EFAULT : 0; #else return copy_to_user(compat, set, size) ? -EFAULT : 0; #endif } extern int compat_ptrace_request(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data); extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data); struct epoll_event; /* fortunately, this one is fixed-layout */ extern void __user *compat_alloc_user_space(unsigned long len); int compat_restore_altstack(const compat_stack_t __user *uss); int __compat_save_altstack(compat_stack_t __user *, unsigned long); #define unsafe_compat_save_altstack(uss, sp, label) do { \ compat_stack_t __user *__uss = uss; \ struct task_struct *t = current; \ unsafe_put_user(ptr_to_compat((void __user *)t->sas_ss_sp), \ &__uss->ss_sp, label); \ unsafe_put_user(t->sas_ss_flags, &__uss->ss_flags, label); \ unsafe_put_user(t->sas_ss_size, &__uss->ss_size, label); \ if (t->sas_ss_flags & SS_AUTODISARM) \ sas_ss_reset(t); \ } while (0); /* * These syscall function prototypes are kept in the same order as * include/uapi/asm-generic/unistd.h. Deprecated or obsolete system calls * go below. * * Please note that these prototypes here are only provided for information * purposes, for static analysis, and for linking from the syscall table. * These functions should not be called elsewhere from kernel code. * * As the syscall calling convention may be different from the default * for architectures overriding the syscall calling convention, do not * include the prototypes if CONFIG_ARCH_HAS_SYSCALL_WRAPPER is enabled. */ #ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER asmlinkage long compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p); asmlinkage long compat_sys_io_submit(compat_aio_context_t ctx_id, int nr, u32 __user *iocb); asmlinkage long compat_sys_io_pgetevents(compat_aio_context_t ctx_id, compat_long_t min_nr, compat_long_t nr, struct io_event __user *events, struct old_timespec32 __user *timeout, const struct __compat_aio_sigset __user *usig); asmlinkage long compat_sys_io_pgetevents_time64(compat_aio_context_t ctx_id, compat_long_t min_nr, compat_long_t nr, struct io_event __user *events, struct __kernel_timespec __user *timeout, const struct __compat_aio_sigset __user *usig); /* fs/cookies.c */ asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t); /* fs/eventpoll.c */ asmlinkage long compat_sys_epoll_pwait(int epfd, struct epoll_event __user *events, int maxevents, int timeout, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); /* fs/fcntl.c */ asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, compat_ulong_t arg); asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, compat_ulong_t arg); /* fs/ioctl.c */ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, compat_ulong_t arg); /* fs/open.c */ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf); asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf); asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf); asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf); asmlinkage long compat_sys_truncate(const char __user *, compat_off_t); asmlinkage long compat_sys_ftruncate(unsigned int, compat_ulong_t); /* No generic prototype for truncate64, ftruncate64, fallocate */ asmlinkage long compat_sys_openat(int dfd, const char __user *filename, int flags, umode_t mode); /* fs/readdir.c */ asmlinkage long compat_sys_getdents(unsigned int fd, struct compat_linux_dirent __user *dirent, unsigned int count); /* fs/read_write.c */ asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int); /* No generic prototype for pread64 and pwrite64 */ asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd, const struct iovec __user *vec, compat_ulong_t vlen, u32 pos_low, u32 pos_high); asmlinkage ssize_t compat_sys_pwritev(compat_ulong_t fd, const struct iovec __user *vec, compat_ulong_t vlen, u32 pos_low, u32 pos_high); #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 asmlinkage long compat_sys_preadv64(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, loff_t pos); #endif #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 asmlinkage long compat_sys_pwritev64(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, loff_t pos); #endif /* fs/sendfile.c */ asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, compat_size_t count); asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, compat_size_t count); /* fs/select.c */ asmlinkage long compat_sys_pselect6_time32(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct old_timespec32 __user *tsp, void __user *sig); asmlinkage long compat_sys_pselect6_time64(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct __kernel_timespec __user *tsp, void __user *sig); asmlinkage long compat_sys_ppoll_time32(struct pollfd __user *ufds, unsigned int nfds, struct old_timespec32 __user *tsp, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); asmlinkage long compat_sys_ppoll_time64(struct pollfd __user *ufds, unsigned int nfds, struct __kernel_timespec __user *tsp, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); /* fs/signalfd.c */ asmlinkage long compat_sys_signalfd4(int ufd, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize, int flags); /* fs/stat.c */ asmlinkage long compat_sys_newfstatat(unsigned int dfd, const char __user *filename, struct compat_stat __user *statbuf, int flag); asmlinkage long compat_sys_newfstat(unsigned int fd, struct compat_stat __user *statbuf); /* fs/sync.c: No generic prototype for sync_file_range and sync_file_range2 */ /* kernel/exit.c */ asmlinkage long compat_sys_waitid(int, compat_pid_t, struct compat_siginfo __user *, int, struct compat_rusage __user *); /* kernel/futex.c */ asmlinkage long compat_sys_set_robust_list(struct compat_robust_list_head __user *head, compat_size_t len); asmlinkage long compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, compat_size_t __user *len_ptr); /* kernel/itimer.c */ asmlinkage long compat_sys_getitimer(int which, struct old_itimerval32 __user *it); asmlinkage long compat_sys_setitimer(int which, struct old_itimerval32 __user *in, struct old_itimerval32 __user *out); /* kernel/kexec.c */ asmlinkage long compat_sys_kexec_load(compat_ulong_t entry, compat_ulong_t nr_segments, struct compat_kexec_segment __user *, compat_ulong_t flags); /* kernel/posix-timers.c */ asmlinkage long compat_sys_timer_create(clockid_t which_clock, struct compat_sigevent __user *timer_event_spec, timer_t __user *created_timer_id); /* kernel/ptrace.c */ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, compat_long_t addr, compat_long_t data); /* kernel/sched/core.c */ asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid, unsigned int len, compat_ulong_t __user *user_mask_ptr); asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len, compat_ulong_t __user *user_mask_ptr); /* kernel/signal.c */ asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, compat_stack_t __user *uoss_ptr); asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat_size_t sigsetsize); #ifndef CONFIG_ODD_RT_SIGACTION asmlinkage long compat_sys_rt_sigaction(int, const struct compat_sigaction __user *, struct compat_sigaction __user *, compat_size_t); #endif asmlinkage long compat_sys_rt_sigprocmask(int how, compat_sigset_t __user *set, compat_sigset_t __user *oset, compat_size_t sigsetsize); asmlinkage long compat_sys_rt_sigpending(compat_sigset_t __user *uset, compat_size_t sigsetsize); asmlinkage long compat_sys_rt_sigtimedwait_time32(compat_sigset_t __user *uthese, struct compat_siginfo __user *uinfo, struct old_timespec32 __user *uts, compat_size_t sigsetsize); asmlinkage long compat_sys_rt_sigtimedwait_time64(compat_sigset_t __user *uthese, struct compat_siginfo __user *uinfo, struct __kernel_timespec __user *uts, compat_size_t sigsetsize); asmlinkage long compat_sys_rt_sigqueueinfo(compat_pid_t pid, int sig, struct compat_siginfo __user *uinfo); /* No generic prototype for rt_sigreturn */ /* kernel/sys.c */ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf); asmlinkage long compat_sys_getrlimit(unsigned int resource, struct compat_rlimit __user *rlim); asmlinkage long compat_sys_setrlimit(unsigned int resource, struct compat_rlimit __user *rlim); asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru); /* kernel/time.c */ asmlinkage long compat_sys_gettimeofday(struct old_timeval32 __user *tv, struct timezone __user *tz); asmlinkage long compat_sys_settimeofday(struct old_timeval32 __user *tv, struct timezone __user *tz); /* kernel/timer.c */ asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info); /* ipc/mqueue.c */ asmlinkage long compat_sys_mq_open(const char __user *u_name, int oflag, compat_mode_t mode, struct compat_mq_attr __user *u_attr); asmlinkage long compat_sys_mq_notify(mqd_t mqdes, const struct compat_sigevent __user *u_notification); asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes, const struct compat_mq_attr __user *u_mqstat, struct compat_mq_attr __user *u_omqstat); /* ipc/msg.c */ asmlinkage long compat_sys_msgctl(int first, int second, void __user *uptr); asmlinkage long compat_sys_msgrcv(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, compat_long_t msgtyp, int msgflg); asmlinkage long compat_sys_msgsnd(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, int msgflg); /* ipc/sem.c */ asmlinkage long compat_sys_semctl(int semid, int semnum, int cmd, int arg); /* ipc/shm.c */ asmlinkage long compat_sys_shmctl(int first, int second, void __user *uptr); asmlinkage long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg); /* net/socket.c */ asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, compat_size_t len, unsigned flags, struct sockaddr __user *addr, int __user *addrlen); asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags); asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags); /* mm/filemap.c: No generic prototype for readahead */ /* security/keys/keyctl.c */ asmlinkage long compat_sys_keyctl(u32 option, u32 arg2, u32 arg3, u32 arg4, u32 arg5); /* arch/example/kernel/sys_example.c */ asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv, const compat_uptr_t __user *envp); /* mm/fadvise.c: No generic prototype for fadvise64_64 */ /* mm/, CONFIG_MMU only */ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, compat_ulong_t mode, compat_ulong_t __user *nmask, compat_ulong_t maxnode, compat_ulong_t flags); asmlinkage long compat_sys_get_mempolicy(int __user *policy, compat_ulong_t __user *nmask, compat_ulong_t maxnode, compat_ulong_t addr, compat_ulong_t flags); asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask, compat_ulong_t maxnode); asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes, const compat_ulong_t __user *new_nodes); asmlinkage long compat_sys_move_pages(pid_t pid, compat_ulong_t nr_pages, __u32 __user *pages, const int __user *nodes, int __user *status, int flags); asmlinkage long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, struct compat_siginfo __user *uinfo); asmlinkage long compat_sys_recvmmsg_time64(int fd, struct compat_mmsghdr __user *mmsg, unsigned vlen, unsigned int flags, struct __kernel_timespec __user *timeout); asmlinkage long compat_sys_recvmmsg_time32(int fd, struct compat_mmsghdr __user *mmsg, unsigned vlen, unsigned int flags, struct old_timespec32 __user *timeout); asmlinkage long compat_sys_wait4(compat_pid_t pid, compat_uint_t __user *stat_addr, int options, struct compat_rusage __user *ru); asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, int, const char __user *); asmlinkage long compat_sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, unsigned vlen, unsigned int flags); asmlinkage long compat_sys_execveat(int dfd, const char __user *filename, const compat_uptr_t __user *argv, const compat_uptr_t __user *envp, int flags); asmlinkage ssize_t compat_sys_preadv2(compat_ulong_t fd, const struct iovec __user *vec, compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd, const struct iovec __user *vec, compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 asmlinkage long compat_sys_preadv64v2(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, loff_t pos, rwf_t flags); #endif #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 asmlinkage long compat_sys_pwritev64v2(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, loff_t pos, rwf_t flags); #endif /* * Deprecated system calls which are still defined in * include/uapi/asm-generic/unistd.h and wanted by >= 1 arch */ /* __ARCH_WANT_SYSCALL_NO_AT */ asmlinkage long compat_sys_open(const char __user *filename, int flags, umode_t mode); /* __ARCH_WANT_SYSCALL_NO_FLAGS */ asmlinkage long compat_sys_signalfd(int ufd, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); /* __ARCH_WANT_SYSCALL_OFF_T */ asmlinkage long compat_sys_newstat(const char __user *filename, struct compat_stat __user *statbuf); asmlinkage long compat_sys_newlstat(const char __user *filename, struct compat_stat __user *statbuf); /* __ARCH_WANT_SYSCALL_DEPRECATED */ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct old_timeval32 __user *tvp); asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u32); asmlinkage long compat_sys_recv(int fd, void __user *buf, compat_size_t len, unsigned flags); /* obsolete: fs/readdir.c */ asmlinkage long compat_sys_old_readdir(unsigned int fd, struct compat_old_linux_dirent __user *, unsigned int count); /* obsolete: fs/select.c */ asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg); /* obsolete: ipc */ asmlinkage long compat_sys_ipc(u32, int, int, u32, compat_uptr_t, u32); /* obsolete: kernel/signal.c */ #ifdef __ARCH_WANT_SYS_SIGPENDING asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set); #endif #ifdef __ARCH_WANT_SYS_SIGPROCMASK asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *nset, compat_old_sigset_t __user *oset); #endif #ifdef CONFIG_COMPAT_OLD_SIGACTION asmlinkage long compat_sys_sigaction(int sig, const struct compat_old_sigaction __user *act, struct compat_old_sigaction __user *oact); #endif /* obsolete: net/socket.c */ asmlinkage long compat_sys_socketcall(int call, u32 __user *args); #endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */ /* * For most but not all architectures, "am I in a compat syscall?" and * "am I a compat task?" are the same question. For architectures on which * they aren't the same question, arch code can override in_compat_syscall. */ #ifndef in_compat_syscall static inline bool in_compat_syscall(void) { return is_compat_task(); } #endif /** * ns_to_old_timeval32 - Compat version of ns_to_timeval * @nsec: the nanoseconds value to be converted * * Returns the old_timeval32 representation of the nsec parameter. */ static inline struct old_timeval32 ns_to_old_timeval32(s64 nsec) { struct __kernel_old_timeval tv; struct old_timeval32 ctv; tv = ns_to_kernel_old_timeval(nsec); ctv.tv_sec = tv.tv_sec; ctv.tv_usec = tv.tv_usec; return ctv; } /* * Kernel code should not call compat syscalls (i.e., compat_sys_xyzyyz()) * directly. Instead, use one of the functions which work equivalently, such * as the kcompat_sys_xyzyyz() functions prototyped below. */ int kcompat_sys_statfs64(const char __user * pathname, compat_size_t sz, struct compat_statfs64 __user * buf); int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user * buf); #else /* !CONFIG_COMPAT */ #define is_compat_task() (0) /* Ensure no one redefines in_compat_syscall() under !CONFIG_COMPAT */ #define in_compat_syscall in_compat_syscall static inline bool in_compat_syscall(void) { return false; } #endif /* CONFIG_COMPAT */ /* * Some legacy ABIs like the i386 one use less than natural alignment for 64-bit * types, and will need special compat treatment for that. Most architectures * don't need that special handling even for compat syscalls. */ #ifndef compat_need_64bit_alignment_fixup #define compat_need_64bit_alignment_fixup() false #endif /* * A pointer passed in from user mode. This should not * be used for syscall parameters, just declare them * as pointers because the syscall entry code will have * appropriately converted them already. */ #ifndef compat_ptr static inline void __user *compat_ptr(compat_uptr_t uptr) { return (void __user *)(unsigned long)uptr; } #endif static inline compat_uptr_t ptr_to_compat(void __user *uptr) { return (u32)(unsigned long)uptr; } #endif /* _LINUX_COMPAT_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM migrate #if !defined(_TRACE_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MIGRATE_H #include <linux/tracepoint.h> #define MIGRATE_MODE \ EM( MIGRATE_ASYNC, "MIGRATE_ASYNC") \ EM( MIGRATE_SYNC_LIGHT, "MIGRATE_SYNC_LIGHT") \ EMe(MIGRATE_SYNC, "MIGRATE_SYNC") #define MIGRATE_REASON \ EM( MR_COMPACTION, "compaction") \ EM( MR_MEMORY_FAILURE, "memory_failure") \ EM( MR_MEMORY_HOTPLUG, "memory_hotplug") \ EM( MR_SYSCALL, "syscall_or_cpuset") \ EM( MR_MEMPOLICY_MBIND, "mempolicy_mbind") \ EM( MR_NUMA_MISPLACED, "numa_misplaced") \ EMe(MR_CONTIG_RANGE, "contig_range") /* * First define the enums in the above macros to be exported to userspace * via TRACE_DEFINE_ENUM(). */ #undef EM #undef EMe #define EM(a, b) TRACE_DEFINE_ENUM(a); #define EMe(a, b) TRACE_DEFINE_ENUM(a); MIGRATE_MODE MIGRATE_REASON /* * Now redefine the EM() and EMe() macros to map the enums to the strings * that will be printed in the output. */ #undef EM #undef EMe #define EM(a, b) {a, b}, #define EMe(a, b) {a, b} TRACE_EVENT(mm_migrate_pages, TP_PROTO(unsigned long succeeded, unsigned long failed, unsigned long thp_succeeded, unsigned long thp_failed, unsigned long thp_split, enum migrate_mode mode, int reason), TP_ARGS(succeeded, failed, thp_succeeded, thp_failed, thp_split, mode, reason), TP_STRUCT__entry( __field( unsigned long, succeeded) __field( unsigned long, failed) __field( unsigned long, thp_succeeded) __field( unsigned long, thp_failed) __field( unsigned long, thp_split) __field( enum migrate_mode, mode) __field( int, reason) ), TP_fast_assign( __entry->succeeded = succeeded; __entry->failed = failed; __entry->thp_succeeded = thp_succeeded; __entry->thp_failed = thp_failed; __entry->thp_split = thp_split; __entry->mode = mode; __entry->reason = reason; ), TP_printk("nr_succeeded=%lu nr_failed=%lu nr_thp_succeeded=%lu nr_thp_failed=%lu nr_thp_split=%lu mode=%s reason=%s", __entry->succeeded, __entry->failed, __entry->thp_succeeded, __entry->thp_failed, __entry->thp_split, __print_symbolic(__entry->mode, MIGRATE_MODE), __print_symbolic(__entry->reason, MIGRATE_REASON)) ); #endif /* _TRACE_MIGRATE_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __SHMEM_FS_H #define __SHMEM_FS_H #include <linux/file.h> #include <linux/swap.h> #include <linux/mempolicy.h> #include <linux/pagemap.h> #include <linux/percpu_counter.h> #include <linux/xattr.h> #include <linux/fs_parser.h> /* inode in-kernel data */ struct shmem_inode_info { spinlock_t lock; unsigned int seals; /* shmem seals */ unsigned long flags; unsigned long alloced; /* data pages alloced to file */ unsigned long swapped; /* subtotal assigned to swap */ struct list_head shrinklist; /* shrinkable hpage inodes */ struct list_head swaplist; /* chain of maybes on swap */ struct shared_policy policy; /* NUMA memory alloc policy */ struct simple_xattrs xattrs; /* list of xattrs */ atomic_t stop_eviction; /* hold when working on inode */ struct inode vfs_inode; }; struct shmem_sb_info { unsigned long max_blocks; /* How many blocks are allowed */ struct percpu_counter used_blocks; /* How many are allocated */ unsigned long max_inodes; /* How many inodes are allowed */ unsigned long free_inodes; /* How many are left for allocation */ spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ umode_t mode; /* Mount mode for root directory */ unsigned char huge; /* Whether to try for hugepages */ kuid_t uid; /* Mount uid for root directory */ kgid_t gid; /* Mount gid for root directory */ bool full_inums; /* If i_ino should be uint or ino_t */ ino_t next_ino; /* The next per-sb inode number to use */ ino_t __percpu *ino_batch; /* The next per-cpu inode number to use */ struct mempolicy *mpol; /* default memory policy for mappings */ spinlock_t shrinklist_lock; /* Protects shrinklist */ struct list_head shrinklist; /* List of shinkable inodes */ unsigned long shrinklist_len; /* Length of shrinklist */ }; static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) { return container_of(inode, struct shmem_inode_info, vfs_inode); } /* * Functions in mm/shmem.c called directly from elsewhere: */ extern const struct fs_parameter_spec shmem_fs_parameters[]; extern int shmem_init(void); extern int shmem_init_fs_context(struct fs_context *fc); extern struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); extern struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags); extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name, loff_t size, unsigned long flags); extern int shmem_zero_setup(struct vm_area_struct *); extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); extern int shmem_lock(struct file *file, int lock, struct user_struct *user); #ifdef CONFIG_SHMEM extern bool shmem_mapping(struct address_space *mapping); #else static inline bool shmem_mapping(struct address_space *mapping) { return false; } #endif /* CONFIG_SHMEM */ extern void shmem_unlock_mapping(struct address_space *mapping); extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); extern int shmem_unuse(unsigned int type, bool frontswap, unsigned long *fs_pages_to_unuse); extern bool shmem_huge_enabled(struct vm_area_struct *vma); extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); extern unsigned long shmem_partial_swap_usage(struct address_space *mapping, pgoff_t start, pgoff_t end); /* Flag allocation requirements to shmem_getpage */ enum sgp_type { SGP_READ, /* don't exceed i_size, don't allocate page */ SGP_CACHE, /* don't exceed i_size, may allocate page */ SGP_NOHUGE, /* like SGP_CACHE, but no huge pages */ SGP_HUGE, /* like SGP_CACHE, huge pages preferred */ SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */ SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */ }; extern int shmem_getpage(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp); static inline struct page *shmem_read_mapping_page( struct address_space *mapping, pgoff_t index) { return shmem_read_mapping_page_gfp(mapping, index, mapping_gfp_mask(mapping)); } static inline bool shmem_file(struct file *file) { if (!IS_ENABLED(CONFIG_SHMEM)) return false; if (!file || !file->f_mapping) return false; return shmem_mapping(file->f_mapping); } extern bool shmem_charge(struct inode *inode, long pages); extern void shmem_uncharge(struct inode *inode, long pages); #ifdef CONFIG_SHMEM extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, struct page **pagep); extern int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr); #else #define shmem_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \ src_addr, pagep) ({ BUG(); 0; }) #define shmem_mfill_zeropage_pte(dst_mm, dst_pmd, dst_vma, \ dst_addr) ({ BUG(); 0; }) #endif #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * pm_wakeup.h - Power management wakeup interface * * Copyright (C) 2008 Alan Stern * Copyright (C) 2010 Rafael J. Wysocki, Novell Inc. */ #ifndef _LINUX_PM_WAKEUP_H #define _LINUX_PM_WAKEUP_H #ifndef _DEVICE_H_ # error "please don't include this file directly" #endif #include <linux/types.h> struct wake_irq; /** * struct wakeup_source - Representation of wakeup sources * * @name: Name of the wakeup source * @id: Wakeup source id * @entry: Wakeup source list entry * @lock: Wakeup source lock * @wakeirq: Optional device specific wakeirq * @timer: Wakeup timer list * @timer_expires: Wakeup timer expiration * @total_time: Total time this wakeup source has been active. * @max_time: Maximum time this wakeup source has been continuously active. * @last_time: Monotonic clock when the wakeup source's was touched last time. * @prevent_sleep_time: Total time this source has been preventing autosleep. * @event_count: Number of signaled wakeup events. * @active_count: Number of times the wakeup source was activated. * @relax_count: Number of times the wakeup source was deactivated. * @expire_count: Number of times the wakeup source's timeout has expired. * @wakeup_count: Number of times the wakeup source might abort suspend. * @dev: Struct device for sysfs statistics about the wakeup source. * @active: Status of the wakeup source. * @autosleep_enabled: Autosleep is active, so update @prevent_sleep_time. */ struct wakeup_source { const char *name; int id; struct list_head entry; spinlock_t lock; struct wake_irq *wakeirq; struct timer_list timer; unsigned long timer_expires; ktime_t total_time; ktime_t max_time; ktime_t last_time; ktime_t start_prevent_time; ktime_t prevent_sleep_time; unsigned long event_count; unsigned long active_count; unsigned long relax_count; unsigned long expire_count; unsigned long wakeup_count; struct device *dev; bool active:1; bool autosleep_enabled:1; }; #define for_each_wakeup_source(ws) \ for ((ws) = wakeup_sources_walk_start(); \ (ws); \ (ws) = wakeup_sources_walk_next((ws))) #ifdef CONFIG_PM_SLEEP /* * Changes to device_may_wakeup take effect on the next pm state change. */ static inline bool device_can_wakeup(struct device *dev) { return dev->power.can_wakeup; } static inline bool device_may_wakeup(struct device *dev) { return dev->power.can_wakeup && !!dev->power.wakeup; } static inline void device_set_wakeup_path(struct device *dev) { dev->power.wakeup_path = true; } /* drivers/base/power/wakeup.c */ extern struct wakeup_source *wakeup_source_create(const char *name); extern void wakeup_source_destroy(struct wakeup_source *ws); extern void wakeup_source_add(struct wakeup_source *ws); extern void wakeup_source_remove(struct wakeup_source *ws); extern struct wakeup_source *wakeup_source_register(struct device *dev, const char *name); extern void wakeup_source_unregister(struct wakeup_source *ws); extern int wakeup_sources_read_lock(void); extern void wakeup_sources_read_unlock(int idx); extern struct wakeup_source *wakeup_sources_walk_start(void); extern struct wakeup_source *wakeup_sources_walk_next(struct wakeup_source *ws); extern int device_wakeup_enable(struct device *dev); extern int device_wakeup_disable(struct device *dev); extern void device_set_wakeup_capable(struct device *dev, bool capable); extern int device_init_wakeup(struct device *dev, bool val); extern int device_set_wakeup_enable(struct device *dev, bool enable); extern void __pm_stay_awake(struct wakeup_source *ws); extern void pm_stay_awake(struct device *dev); extern void __pm_relax(struct wakeup_source *ws); extern void pm_relax(struct device *dev); extern void pm_wakeup_ws_event(struct wakeup_source *ws, unsigned int msec, bool hard); extern void pm_wakeup_dev_event(struct device *dev, unsigned int msec, bool hard); #else /* !CONFIG_PM_SLEEP */ static inline void device_set_wakeup_capable(struct device *dev, bool capable) { dev->power.can_wakeup = capable; } static inline bool device_can_wakeup(struct device *dev) { return dev->power.can_wakeup; } static inline struct wakeup_source *wakeup_source_create(const char *name) { return NULL; } static inline void wakeup_source_destroy(struct wakeup_source *ws) {} static inline void wakeup_source_add(struct wakeup_source *ws) {} static inline void wakeup_source_remove(struct wakeup_source *ws) {} static inline struct wakeup_source *wakeup_source_register(struct device *dev, const char *name) { return NULL; } static inline void wakeup_source_unregister(struct wakeup_source *ws) {} static inline int device_wakeup_enable(struct device *dev) { dev->power.should_wakeup = true; return 0; } static inline int device_wakeup_disable(struct device *dev) { dev->power.should_wakeup = false; return 0; } static inline int device_set_wakeup_enable(struct device *dev, bool enable) { dev->power.should_wakeup = enable; return 0; } static inline int device_init_wakeup(struct device *dev, bool val) { device_set_wakeup_capable(dev, val); device_set_wakeup_enable(dev, val); return 0; } static inline bool device_may_wakeup(struct device *dev) { return dev->power.can_wakeup && dev->power.should_wakeup; } static inline void device_set_wakeup_path(struct device *dev) {} static inline void __pm_stay_awake(struct wakeup_source *ws) {} static inline void pm_stay_awake(struct device *dev) {} static inline void __pm_relax(struct wakeup_source *ws) {} static inline void pm_relax(struct device *dev) {} static inline void pm_wakeup_ws_event(struct wakeup_source *ws, unsigned int msec, bool hard) {} static inline void pm_wakeup_dev_event(struct device *dev, unsigned int msec, bool hard) {} #endif /* !CONFIG_PM_SLEEP */ static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) { return pm_wakeup_ws_event(ws, msec, false); } static inline void pm_wakeup_event(struct device *dev, unsigned int msec) { return pm_wakeup_dev_event(dev, msec, false); } static inline void pm_wakeup_hard_event(struct device *dev) { return pm_wakeup_dev_event(dev, 0, true); } #endif /* _LINUX_PM_WAKEUP_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Hash algorithms. * * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_INTERNAL_HASH_H #define _CRYPTO_INTERNAL_HASH_H #include <crypto/algapi.h> #include <crypto/hash.h> struct ahash_request; struct scatterlist; struct crypto_hash_walk { char *data; unsigned int offset; unsigned int alignmask; struct page *pg; unsigned int entrylen; unsigned int total; struct scatterlist *sg; unsigned int flags; }; struct ahash_instance { void (*free)(struct ahash_instance *inst); union { struct { char head[offsetof(struct ahash_alg, halg.base)]; struct crypto_instance base; } s; struct ahash_alg alg; }; }; struct shash_instance { void (*free)(struct shash_instance *inst); union { struct { char head[offsetof(struct shash_alg, base)]; struct crypto_instance base; } s; struct shash_alg alg; }; }; struct crypto_ahash_spawn { struct crypto_spawn base; }; struct crypto_shash_spawn { struct crypto_spawn base; }; int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err); int crypto_hash_walk_first(struct ahash_request *req, struct crypto_hash_walk *walk); static inline int crypto_hash_walk_last(struct crypto_hash_walk *walk) { return !(walk->entrylen | walk->total); } int crypto_register_ahash(struct ahash_alg *alg); void crypto_unregister_ahash(struct ahash_alg *alg); int crypto_register_ahashes(struct ahash_alg *algs, int count); void crypto_unregister_ahashes(struct ahash_alg *algs, int count); int ahash_register_instance(struct crypto_template *tmpl, struct ahash_instance *inst); bool crypto_shash_alg_has_setkey(struct shash_alg *alg); static inline bool crypto_shash_alg_needs_key(struct shash_alg *alg) { return crypto_shash_alg_has_setkey(alg) && !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY); } bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg); int crypto_grab_ahash(struct crypto_ahash_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); static inline void crypto_drop_ahash(struct crypto_ahash_spawn *spawn) { crypto_drop_spawn(&spawn->base); } static inline struct hash_alg_common *crypto_spawn_ahash_alg( struct crypto_ahash_spawn *spawn) { return __crypto_hash_alg_common(spawn->base.alg); } int crypto_register_shash(struct shash_alg *alg); void crypto_unregister_shash(struct shash_alg *alg); int crypto_register_shashes(struct shash_alg *algs, int count); void crypto_unregister_shashes(struct shash_alg *algs, int count); int shash_register_instance(struct crypto_template *tmpl, struct shash_instance *inst); void shash_free_singlespawn_instance(struct shash_instance *inst); int crypto_grab_shash(struct crypto_shash_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); static inline void crypto_drop_shash(struct crypto_shash_spawn *spawn) { crypto_drop_spawn(&spawn->base); } static inline struct shash_alg *crypto_spawn_shash_alg( struct crypto_shash_spawn *spawn) { return __crypto_shash_alg(spawn->base.alg); } int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc); int shash_ahash_finup(struct ahash_request *req, struct shash_desc *desc); int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc); int crypto_init_shash_ops_async(struct crypto_tfm *tfm); static inline void *crypto_ahash_ctx(struct crypto_ahash *tfm) { return crypto_tfm_ctx(crypto_ahash_tfm(tfm)); } static inline struct ahash_alg *__crypto_ahash_alg(struct crypto_alg *alg) { return container_of(__crypto_hash_alg_common(alg), struct ahash_alg, halg); } static inline void crypto_ahash_set_reqsize(struct crypto_ahash *tfm, unsigned int reqsize) { tfm->reqsize = reqsize; } static inline struct crypto_instance *ahash_crypto_instance( struct ahash_instance *inst) { return &inst->s.base; } static inline struct ahash_instance *ahash_instance( struct crypto_instance *inst) { return container_of(inst, struct ahash_instance, s.base); } static inline struct ahash_instance *ahash_alg_instance( struct crypto_ahash *ahash) { return ahash_instance(crypto_tfm_alg_instance(&ahash->base)); } static inline void *ahash_instance_ctx(struct ahash_instance *inst) { return crypto_instance_ctx(ahash_crypto_instance(inst)); } static inline void ahash_request_complete(struct ahash_request *req, int err) { req->base.complete(&req->base, err); } static inline u32 ahash_request_flags(struct ahash_request *req) { return req->base.flags; } static inline struct crypto_ahash *crypto_spawn_ahash( struct crypto_ahash_spawn *spawn) { return crypto_spawn_tfm2(&spawn->base); } static inline int ahash_enqueue_request(struct crypto_queue *queue, struct ahash_request *request) { return crypto_enqueue_request(queue, &request->base); } static inline struct ahash_request *ahash_dequeue_request( struct crypto_queue *queue) { return ahash_request_cast(crypto_dequeue_request(queue)); } static inline void *crypto_shash_ctx(struct crypto_shash *tfm) { return crypto_tfm_ctx(&tfm->base); } static inline struct crypto_instance *shash_crypto_instance( struct shash_instance *inst) { return &inst->s.base; } static inline struct shash_instance *shash_instance( struct crypto_instance *inst) { return container_of(inst, struct shash_instance, s.base); } static inline struct shash_instance *shash_alg_instance( struct crypto_shash *shash) { return shash_instance(crypto_tfm_alg_instance(&shash->base)); } static inline void *shash_instance_ctx(struct shash_instance *inst) { return crypto_instance_ctx(shash_crypto_instance(inst)); } static inline struct crypto_shash *crypto_spawn_shash( struct crypto_shash_spawn *spawn) { return crypto_spawn_tfm2(&spawn->base); } static inline void *crypto_shash_ctx_aligned(struct crypto_shash *tfm) { return crypto_tfm_ctx_aligned(&tfm->base); } static inline struct crypto_shash *__crypto_shash_cast(struct crypto_tfm *tfm) { return container_of(tfm, struct crypto_shash, base); } #endif /* _CRYPTO_INTERNAL_HASH_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 // SPDX-License-Identifier: GPL-2.0 /* * Helper routines for building identity mapping page tables. This is * included by both the compressed kernel and the regular kernel. */ static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page, unsigned long addr, unsigned long end) { addr &= PMD_MASK; for (; addr < end; addr += PMD_SIZE) { pmd_t *pmd = pmd_page + pmd_index(addr); if (pmd_present(*pmd)) continue; set_pmd(pmd, __pmd((addr - info->offset) | info->page_flag)); } } static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, unsigned long addr, unsigned long end) { unsigned long next; for (; addr < end; addr = next) { pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; next = (addr & PUD_MASK) + PUD_SIZE; if (next > end) next = end; if (info->direct_gbpages) { pud_t pudval; if (pud_present(*pud)) continue; addr &= PUD_MASK; pudval = __pud((addr - info->offset) | info->page_flag); set_pud(pud, pudval); continue; } if (pud_present(*pud)) { pmd = pmd_offset(pud, 0); ident_pmd_init(info, pmd, addr, next); continue; } pmd = (pmd_t *)info->alloc_pgt_page(info->context); if (!pmd) return -ENOMEM; ident_pmd_init(info, pmd, addr, next); set_pud(pud, __pud(__pa(pmd) | info->kernpg_flag)); } return 0; } static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page, unsigned long addr, unsigned long end) { unsigned long next; int result; for (; addr < end; addr = next) { p4d_t *p4d = p4d_page + p4d_index(addr); pud_t *pud; next = (addr & P4D_MASK) + P4D_SIZE; if (next > end) next = end; if (p4d_present(*p4d)) { pud = pud_offset(p4d, 0); result = ident_pud_init(info, pud, addr, next); if (result) return result; continue; } pud = (pud_t *)info->alloc_pgt_page(info->context); if (!pud) return -ENOMEM; result = ident_pud_init(info, pud, addr, next); if (result) return result; set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag)); } return 0; } int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, unsigned long pstart, unsigned long pend) { unsigned long addr = pstart + info->offset; unsigned long end = pend + info->offset; unsigned long next; int result; /* Set the default pagetable flags if not supplied */ if (!info->kernpg_flag) info->kernpg_flag = _KERNPG_TABLE; /* Filter out unsupported __PAGE_KERNEL_* bits: */ info->kernpg_flag &= __default_kernel_pte_mask; for (; addr < end; addr = next) { pgd_t *pgd = pgd_page + pgd_index(addr); p4d_t *p4d; next = (addr & PGDIR_MASK) + PGDIR_SIZE; if (next > end) next = end; if (pgd_present(*pgd)) { p4d = p4d_offset(pgd, 0); result = ident_p4d_init(info, p4d, addr, next); if (result) return result; continue; } p4d = (p4d_t *)info->alloc_pgt_page(info->context); if (!p4d) return -ENOMEM; result = ident_p4d_init(info, p4d, addr, next); if (result) return result; if (pgtable_l5_enabled()) { set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag)); } else { /* * With p4d folded, pgd is equal to p4d. * The pgd entry has to point to the pud page table in this case. */ pud_t *pud = pud_offset(p4d, 0); set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag)); } } return 0; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * NET Generic infrastructure for Network protocols. * * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ #ifndef _TIMEWAIT_SOCK_H #define _TIMEWAIT_SOCK_H #include <linux/slab.h> #include <linux/bug.h> #include <net/sock.h> struct timewait_sock_ops { struct kmem_cache *twsk_slab; char *twsk_slab_name; unsigned int twsk_obj_size; int (*twsk_unique)(struct sock *sk, struct sock *sktw, void *twp); void (*twsk_destructor)(struct sock *sk); }; static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp) { if (sk->sk_prot->twsk_prot->twsk_unique != NULL) return sk->sk_prot->twsk_prot->twsk_unique(sk, sktw, twp); return 0; } static inline void twsk_destructor(struct sock *sk) { if (sk->sk_prot->twsk_prot->twsk_destructor != NULL) sk->sk_prot->twsk_prot->twsk_destructor(sk); } #endif /* _TIMEWAIT_SOCK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_SCHED_GENERIC_H #define __NET_SCHED_GENERIC_H #include <linux/netdevice.h> #include <linux/types.h> #include <linux/rcupdate.h> #include <linux/pkt_sched.h> #include <linux/pkt_cls.h> #include <linux/percpu.h> #include <linux/dynamic_queue_limits.h> #include <linux/list.h> #include <linux/refcount.h> #include <linux/workqueue.h> #include <linux/mutex.h> #include <linux/rwsem.h> #include <linux/atomic.h> #include <linux/hashtable.h> #include <net/gen_stats.h> #include <net/rtnetlink.h> #include <net/flow_offload.h> struct Qdisc_ops; struct qdisc_walker; struct tcf_walker; struct module; struct bpf_flow_keys; struct qdisc_rate_table { struct tc_ratespec rate; u32 data[256]; struct qdisc_rate_table *next; int refcnt; }; enum qdisc_state_t { __QDISC_STATE_SCHED, __QDISC_STATE_DEACTIVATED, __QDISC_STATE_MISSED, }; struct qdisc_size_table { struct rcu_head rcu; struct list_head list; struct tc_sizespec szopts; int refcnt; u16 data[]; }; /* similar to sk_buff_head, but skb->prev pointer is undefined. */ struct qdisc_skb_head { struct sk_buff *head; struct sk_buff *tail; __u32 qlen; spinlock_t lock; }; struct Qdisc { int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free); struct sk_buff * (*dequeue)(struct Qdisc *sch); unsigned int flags; #define TCQ_F_BUILTIN 1 #define TCQ_F_INGRESS 2 #define TCQ_F_CAN_BYPASS 4 #define TCQ_F_MQROOT 8 #define TCQ_F_ONETXQUEUE 0x10 /* dequeue_skb() can assume all skbs are for * q->dev_queue : It can test * netif_xmit_frozen_or_stopped() before * dequeueing next packet. * Its true for MQ/MQPRIO slaves, or non * multiqueue device. */ #define TCQ_F_WARN_NONWC (1 << 16) #define TCQ_F_CPUSTATS 0x20 /* run using percpu statistics */ #define TCQ_F_NOPARENT 0x40 /* root of its hierarchy : * qdisc_tree_decrease_qlen() should stop. */ #define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */ #define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */ #define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */ u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; struct hlist_node hash; u32 handle; u32 parent; struct netdev_queue *dev_queue; struct net_rate_estimator __rcu *rate_est; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; int pad; refcount_t refcnt; /* * For performance sake on SMP, we put highly modified fields at the end */ struct sk_buff_head gso_skb ____cacheline_aligned_in_smp; struct qdisc_skb_head q; struct gnet_stats_basic_packed bstats; seqcount_t running; struct gnet_stats_queue qstats; unsigned long state; struct Qdisc *next_sched; struct sk_buff_head skb_bad_txq; spinlock_t busylock ____cacheline_aligned_in_smp; spinlock_t seqlock; /* for NOLOCK qdisc, true if there are no enqueued skbs */ bool empty; struct rcu_head rcu; /* private data */ long privdata[] ____cacheline_aligned; }; static inline void qdisc_refcount_inc(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_BUILTIN) return; refcount_inc(&qdisc->refcnt); } /* Intended to be used by unlocked users, when concurrent qdisc release is * possible. */ static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_BUILTIN) return qdisc; if (refcount_inc_not_zero(&qdisc->refcnt)) return qdisc; return NULL; } static inline bool qdisc_is_running(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) return spin_is_locked(&qdisc->seqlock); return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; } static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) { return q->flags & TCQ_F_CPUSTATS; } static inline bool qdisc_is_empty(const struct Qdisc *qdisc) { if (qdisc_is_percpu_stats(qdisc)) return READ_ONCE(qdisc->empty); return !READ_ONCE(qdisc->q.qlen); } static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) { if (spin_trylock(&qdisc->seqlock)) goto nolock_empty; /* Paired with smp_mb__after_atomic() to make sure * STATE_MISSED checking is synchronized with clearing * in pfifo_fast_dequeue(). */ smp_mb__before_atomic(); /* If the MISSED flag is set, it means other thread has * set the MISSED flag before second spin_trylock(), so * we can return false here to avoid multi cpus doing * the set_bit() and second spin_trylock() concurrently. */ if (test_bit(__QDISC_STATE_MISSED, &qdisc->state)) return false; /* Set the MISSED flag before the second spin_trylock(), * if the second spin_trylock() return false, it means * other cpu holding the lock will do dequeuing for us * or it will see the MISSED flag set after releasing * lock and reschedule the net_tx_action() to do the * dequeuing. */ set_bit(__QDISC_STATE_MISSED, &qdisc->state); /* spin_trylock() only has load-acquire semantic, so use * smp_mb__after_atomic() to ensure STATE_MISSED is set * before doing the second spin_trylock(). */ smp_mb__after_atomic(); /* Retry again in case other CPU may not see the new flag * after it releases the lock at the end of qdisc_run_end(). */ if (!spin_trylock(&qdisc->seqlock)) return false; nolock_empty: WRITE_ONCE(qdisc->empty, false); } else if (qdisc_is_running(qdisc)) { return false; } /* Variant of write_seqcount_begin() telling lockdep a trylock * was attempted. */ raw_write_seqcount_begin(&qdisc->running); seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); return true; } static inline void qdisc_run_end(struct Qdisc *qdisc) { write_seqcount_end(&qdisc->running); if (qdisc->flags & TCQ_F_NOLOCK) { spin_unlock(&qdisc->seqlock); if (unlikely(test_bit(__QDISC_STATE_MISSED, &qdisc->state))) { clear_bit(__QDISC_STATE_MISSED, &qdisc->state); __netif_schedule(qdisc); } } } static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) { return qdisc->flags & TCQ_F_ONETXQUEUE; } static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq) { #ifdef CONFIG_BQL /* Non-BQL migrated drivers will return 0, too. */ return dql_avail(&txq->dql); #else return 0; #endif } struct Qdisc_class_ops { unsigned int flags; /* Child qdisc manipulation */ struct netdev_queue * (*select_queue)(struct Qdisc *, struct tcmsg *); int (*graft)(struct Qdisc *, unsigned long cl, struct Qdisc *, struct Qdisc **, struct netlink_ext_ack *extack); struct Qdisc * (*leaf)(struct Qdisc *, unsigned long cl); void (*qlen_notify)(struct Qdisc *, unsigned long); /* Class manipulation routines */ unsigned long (*find)(struct Qdisc *, u32 classid); int (*change)(struct Qdisc *, u32, u32, struct nlattr **, unsigned long *, struct netlink_ext_ack *); int (*delete)(struct Qdisc *, unsigned long); void (*walk)(struct Qdisc *, struct qdisc_walker * arg); /* Filter manipulation */ struct tcf_block * (*tcf_block)(struct Qdisc *sch, unsigned long arg, struct netlink_ext_ack *extack); unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid); void (*unbind_tcf)(struct Qdisc *, unsigned long); /* rtnetlink specific */ int (*dump)(struct Qdisc *, unsigned long, struct sk_buff *skb, struct tcmsg*); int (*dump_stats)(struct Qdisc *, unsigned long, struct gnet_dump *); }; /* Qdisc_class_ops flag values */ /* Implements API that doesn't require rtnl lock */ enum qdisc_class_ops_flags { QDISC_CLASS_OPS_DOIT_UNLOCKED = 1, }; struct Qdisc_ops { struct Qdisc_ops *next; const struct Qdisc_class_ops *cl_ops; char id[IFNAMSIZ]; int priv_size; unsigned int static_flags; int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free); struct sk_buff * (*dequeue)(struct Qdisc *); struct sk_buff * (*peek)(struct Qdisc *); int (*init)(struct Qdisc *sch, struct nlattr *arg, struct netlink_ext_ack *extack); void (*reset)(struct Qdisc *); void (*destroy)(struct Qdisc *); int (*change)(struct Qdisc *sch, struct nlattr *arg, struct netlink_ext_ack *extack); void (*attach)(struct Qdisc *sch); int (*change_tx_queue_len)(struct Qdisc *, unsigned int); void (*change_real_num_tx)(struct Qdisc *sch, unsigned int new_real_tx); int (*dump)(struct Qdisc *, struct sk_buff *); int (*dump_stats)(struct Qdisc *, struct gnet_dump *); void (*ingress_block_set)(struct Qdisc *sch, u32 block_index); void (*egress_block_set)(struct Qdisc *sch, u32 block_index); u32 (*ingress_block_get)(struct Qdisc *sch); u32 (*egress_block_get)(struct Qdisc *sch); struct module *owner; }; struct tcf_result { union { struct { unsigned long class; u32 classid; }; const struct tcf_proto *goto_tp; /* used in the skb_tc_reinsert function */ struct { bool ingress; struct gnet_stats_queue *qstats; }; }; }; struct tcf_chain; struct tcf_proto_ops { struct list_head head; char kind[IFNAMSIZ]; int (*classify)(struct sk_buff *, const struct tcf_proto *, struct tcf_result *); int (*init)(struct tcf_proto*); void (*destroy)(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack); void* (*get)(struct tcf_proto*, u32 handle); void (*put)(struct tcf_proto *tp, void *f); int (*change)(struct net *net, struct sk_buff *, struct tcf_proto*, unsigned long, u32 handle, struct nlattr **, void **, bool, bool, struct netlink_ext_ack *); int (*delete)(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, struct netlink_ext_ack *); bool (*delete_empty)(struct tcf_proto *tp); void (*walk)(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held); int (*reoffload)(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack); void (*hw_add)(struct tcf_proto *tp, void *type_data); void (*hw_del)(struct tcf_proto *tp, void *type_data); void (*bind_class)(void *, u32, unsigned long, void *, unsigned long); void * (*tmplt_create)(struct net *net, struct tcf_chain *chain, struct nlattr **tca, struct netlink_ext_ack *extack); void (*tmplt_destroy)(void *tmplt_priv); /* rtnetlink specific */ int (*dump)(struct net*, struct tcf_proto*, void *, struct sk_buff *skb, struct tcmsg*, bool); int (*terse_dump)(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t, bool rtnl_held); int (*tmplt_dump)(struct sk_buff *skb, struct net *net, void *tmplt_priv); struct module *owner; int flags; }; /* Classifiers setting TCF_PROTO_OPS_DOIT_UNLOCKED in tcf_proto_ops->flags * are expected to implement tcf_proto_ops->delete_empty(), otherwise race * conditions can occur when filters are inserted/deleted simultaneously. */ enum tcf_proto_ops_flags { TCF_PROTO_OPS_DOIT_UNLOCKED = 1, }; struct tcf_proto { /* Fast access part */ struct tcf_proto __rcu *next; void __rcu *root; /* called under RCU BH lock*/ int (*classify)(struct sk_buff *, const struct tcf_proto *, struct tcf_result *); __be16 protocol; /* All the rest */ u32 prio; void *data; const struct tcf_proto_ops *ops; struct tcf_chain *chain; /* Lock protects tcf_proto shared state and can be used by unlocked * classifiers to protect their private data. */ spinlock_t lock; bool deleting; refcount_t refcnt; struct rcu_head rcu; struct hlist_node destroy_ht_node; }; struct qdisc_skb_cb { struct { unsigned int pkt_len; u16 slave_dev_queue_mapping; u16 tc_classid; }; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; u16 mru; }; typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); struct tcf_chain { /* Protects filter_chain. */ struct mutex filter_chain_lock; struct tcf_proto __rcu *filter_chain; struct list_head list; struct tcf_block *block; u32 index; /* chain index */ unsigned int refcnt; unsigned int action_refcnt; bool explicitly_created; bool flushing; const struct tcf_proto_ops *tmplt_ops; void *tmplt_priv; struct rcu_head rcu; }; struct tcf_block { /* Lock protects tcf_block and lifetime-management data of chains * attached to the block (refcnt, action_refcnt, explicitly_created). */ struct mutex lock; struct list_head chain_list; u32 index; /* block index for shared blocks */ u32 classid; /* which class this block belongs to */ refcount_t refcnt; struct net *net; struct Qdisc *q; struct rw_semaphore cb_lock; /* protects cb_list and offload counters */ struct flow_block flow_block; struct list_head owner_list; bool keep_dst; atomic_t offloadcnt; /* Number of oddloaded filters */ unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */ unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */ struct { struct tcf_chain *chain; struct list_head filter_chain_list; } chain0; struct rcu_head rcu; DECLARE_HASHTABLE(proto_destroy_ht, 7); struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */ }; #ifdef CONFIG_PROVE_LOCKING static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain) { return lockdep_is_held(&chain->filter_chain_lock); } static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp) { return lockdep_is_held(&tp->lock); } #else static inline bool lockdep_tcf_chain_is_locked(struct tcf_block *chain) { return true; } static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp) { return true; } #endif /* #ifdef CONFIG_PROVE_LOCKING */ #define tcf_chain_dereference(p, chain) \ rcu_dereference_protected(p, lockdep_tcf_chain_is_locked(chain)) #define tcf_proto_dereference(p, tp) \ rcu_dereference_protected(p, lockdep_tcf_proto_is_locked(tp)) static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) { struct qdisc_skb_cb *qcb; BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*qcb)); BUILD_BUG_ON(sizeof(qcb->data) < sz); } static inline int qdisc_qlen_cpu(const struct Qdisc *q) { return this_cpu_ptr(q->cpu_qstats)->qlen; } static inline int qdisc_qlen(const struct Qdisc *q) { return q->q.qlen; } static inline int qdisc_qlen_sum(const struct Qdisc *q) { __u32 qlen = q->qstats.qlen; int i; if (qdisc_is_percpu_stats(q)) { for_each_possible_cpu(i) qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen; } else { qlen += q->q.qlen; } return qlen; } static inline struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb) { return (struct qdisc_skb_cb *)skb->cb; } static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc) { return &qdisc->q.lock; } static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc) { struct Qdisc *q = rcu_dereference_rtnl(qdisc->dev_queue->qdisc); return q; } static inline struct Qdisc *qdisc_root_bh(const struct Qdisc *qdisc) { return rcu_dereference_bh(qdisc->dev_queue->qdisc); } static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc) { return qdisc->dev_queue->qdisc_sleeping; } /* The qdisc root lock is a mechanism by which to top level * of a qdisc tree can be locked from any qdisc node in the * forest. This allows changing the configuration of some * aspect of the qdisc tree while blocking out asynchronous * qdisc access in the packet processing paths. * * It is only legal to do this when the root will not change * on us. Otherwise we'll potentially lock the wrong qdisc * root. This is enforced by holding the RTNL semaphore, which * all users of this lock accessor must do. */ static inline spinlock_t *qdisc_root_lock(const struct Qdisc *qdisc) { struct Qdisc *root = qdisc_root(qdisc); ASSERT_RTNL(); return qdisc_lock(root); } static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) { struct Qdisc *root = qdisc_root_sleeping(qdisc); ASSERT_RTNL(); return qdisc_lock(root); } static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) { struct Qdisc *root = qdisc_root_sleeping(qdisc); ASSERT_RTNL(); return &root->running; } static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc) { return qdisc->dev_queue->dev; } static inline void sch_tree_lock(const struct Qdisc *q) { spin_lock_bh(qdisc_root_sleeping_lock(q)); } static inline void sch_tree_unlock(const struct Qdisc *q) { spin_unlock_bh(qdisc_root_sleeping_lock(q)); } extern struct Qdisc noop_qdisc; extern struct Qdisc_ops noop_qdisc_ops; extern struct Qdisc_ops pfifo_fast_ops; extern struct Qdisc_ops mq_qdisc_ops; extern struct Qdisc_ops noqueue_qdisc_ops; extern const struct Qdisc_ops *default_qdisc_ops; static inline const struct Qdisc_ops * get_default_qdisc_ops(const struct net_device *dev, int ntx) { return ntx < dev->real_num_tx_queues ? default_qdisc_ops : &pfifo_fast_ops; } struct Qdisc_class_common { u32 classid; struct hlist_node hnode; }; struct Qdisc_class_hash { struct hlist_head *hash; unsigned int hashsize; unsigned int hashmask; unsigned int hashelems; }; static inline unsigned int qdisc_class_hash(u32 id, u32 mask) { id ^= id >> 8; id ^= id >> 4; return id & mask; } static inline struct Qdisc_class_common * qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id) { struct Qdisc_class_common *cl; unsigned int h; if (!id) return NULL; h = qdisc_class_hash(id, hash->hashmask); hlist_for_each_entry(cl, &hash->hash[h], hnode) { if (cl->classid == id) return cl; } return NULL; } static inline int tc_classid_to_hwtc(struct net_device *dev, u32 classid) { u32 hwtc = TC_H_MIN(classid) - TC_H_MIN_PRIORITY; return (hwtc < netdev_get_num_tc(dev)) ? hwtc : -EINVAL; } int qdisc_class_hash_init(struct Qdisc_class_hash *); void qdisc_class_hash_insert(struct Qdisc_class_hash *, struct Qdisc_class_common *); void qdisc_class_hash_remove(struct Qdisc_class_hash *, struct Qdisc_class_common *); void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *); void qdisc_class_hash_destroy(struct Qdisc_class_hash *); int dev_qdisc_change_tx_queue_len(struct net_device *dev); void dev_qdisc_change_real_num_tx(struct net_device *dev, unsigned int new_real_tx); void dev_init_scheduler(struct net_device *dev); void dev_shutdown(struct net_device *dev); void dev_activate(struct net_device *dev); void dev_deactivate(struct net_device *dev); void dev_deactivate_many(struct list_head *head); struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc); void qdisc_reset(struct Qdisc *qdisc); void qdisc_put(struct Qdisc *qdisc); void qdisc_put_unlocked(struct Qdisc *qdisc); void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len); #ifdef CONFIG_NET_SCHED int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type, void *type_data); void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, struct Qdisc *new, struct Qdisc *old, enum tc_setup_type type, void *type_data, struct netlink_ext_ack *extack); #else static inline int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type, void *type_data) { q->flags &= ~TCQ_F_OFFLOADED; return 0; } static inline void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, struct Qdisc *new, struct Qdisc *old, enum tc_setup_type type, void *type_data, struct netlink_ext_ack *extack) { } #endif struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops, struct netlink_ext_ack *extack); void qdisc_free(struct Qdisc *qdisc); struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops, u32 parentid, struct netlink_ext_ack *extack); void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab); int skb_do_redirect(struct sk_buff *); static inline bool skb_at_tc_ingress(const struct sk_buff *skb) { #ifdef CONFIG_NET_CLS_ACT return skb->tc_at_ingress; #else return false; #endif } static inline bool skb_skip_tc_classify(struct sk_buff *skb) { #ifdef CONFIG_NET_CLS_ACT if (skb->tc_skip_classify) { skb->tc_skip_classify = 0; return true; } #endif return false; } /* Reset all TX qdiscs greater than index of a device. */ static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) { struct Qdisc *qdisc; for (; i < dev->num_tx_queues; i++) { qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc); if (qdisc) { spin_lock_bh(qdisc_lock(qdisc)); qdisc_reset(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); } } } /* Are all TX queues of the device empty? */ static inline bool qdisc_all_tx_empty(const struct net_device *dev) { unsigned int i; rcu_read_lock(); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); const struct Qdisc *q = rcu_dereference(txq->qdisc); if (!qdisc_is_empty(q)) { rcu_read_unlock(); return false; } } rcu_read_unlock(); return true; } /* Are any of the TX qdiscs changing? */ static inline bool qdisc_tx_changing(const struct net_device *dev) { unsigned int i; for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); if (rcu_access_pointer(txq->qdisc) != txq->qdisc_sleeping) return true; } return false; } /* Is the device using the noop qdisc on all queues? */ static inline bool qdisc_tx_is_noop(const struct net_device *dev) { unsigned int i; for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); if (rcu_access_pointer(txq->qdisc) != &noop_qdisc) return false; } return true; } static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb) { return qdisc_skb_cb(skb)->pkt_len; } /* additional qdisc xmit flags (NET_XMIT_MASK in linux/netdevice.h) */ enum net_xmit_qdisc_t { __NET_XMIT_STOLEN = 0x00010000, __NET_XMIT_BYPASS = 0x00020000, }; #ifdef CONFIG_NET_CLS_ACT #define net_xmit_drop_count(e) ((e) & __NET_XMIT_STOLEN ? 0 : 1) #else #define net_xmit_drop_count(e) (1) #endif static inline void qdisc_calculate_pkt_len(struct sk_buff *skb, const struct Qdisc *sch) { #ifdef CONFIG_NET_SCHED struct qdisc_size_table *stab = rcu_dereference_bh(sch->stab); if (stab) __qdisc_calculate_pkt_len(skb, stab); #endif } static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { qdisc_calculate_pkt_len(skb, sch); return sch->enqueue(skb, sch, to_free); } static inline void _bstats_update(struct gnet_stats_basic_packed *bstats, __u64 bytes, __u32 packets) { bstats->bytes += bytes; bstats->packets += packets; } static inline void bstats_update(struct gnet_stats_basic_packed *bstats, const struct sk_buff *skb) { _bstats_update(bstats, qdisc_pkt_len(skb), skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1); } static inline void _bstats_cpu_update(struct gnet_stats_basic_cpu *bstats, __u64 bytes, __u32 packets) { u64_stats_update_begin(&bstats->syncp); _bstats_update(&bstats->bstats, bytes, packets); u64_stats_update_end(&bstats->syncp); } static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats, const struct sk_buff *skb) { u64_stats_update_begin(&bstats->syncp); bstats_update(&bstats->bstats, skb); u64_stats_update_end(&bstats->syncp); } static inline void qdisc_bstats_cpu_update(struct Qdisc *sch, const struct sk_buff *skb) { bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb); } static inline void qdisc_bstats_update(struct Qdisc *sch, const struct sk_buff *skb) { bstats_update(&sch->bstats, skb); } static inline void qdisc_qstats_backlog_dec(struct Qdisc *sch, const struct sk_buff *skb) { sch->qstats.backlog -= qdisc_pkt_len(skb); } static inline void qdisc_qstats_cpu_backlog_dec(struct Qdisc *sch, const struct sk_buff *skb) { this_cpu_sub(sch->cpu_qstats->backlog, qdisc_pkt_len(skb)); } static inline void qdisc_qstats_backlog_inc(struct Qdisc *sch, const struct sk_buff *skb) { sch->qstats.backlog += qdisc_pkt_len(skb); } static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch, const struct sk_buff *skb) { this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb)); } static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch) { this_cpu_inc(sch->cpu_qstats->qlen); } static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch) { this_cpu_dec(sch->cpu_qstats->qlen); } static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch) { this_cpu_inc(sch->cpu_qstats->requeues); } static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count) { sch->qstats.drops += count; } static inline void qstats_drop_inc(struct gnet_stats_queue *qstats) { qstats->drops++; } static inline void qstats_overlimit_inc(struct gnet_stats_queue *qstats) { qstats->overlimits++; } static inline void qdisc_qstats_drop(struct Qdisc *sch) { qstats_drop_inc(&sch->qstats); } static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch) { this_cpu_inc(sch->cpu_qstats->drops); } static inline void qdisc_qstats_overlimit(struct Qdisc *sch) { sch->qstats.overlimits++; } static inline int qdisc_qstats_copy(struct gnet_dump *d, struct Qdisc *sch) { __u32 qlen = qdisc_qlen_sum(sch); return gnet_stats_copy_queue(d, sch->cpu_qstats, &sch->qstats, qlen); } static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen, __u32 *backlog) { struct gnet_stats_queue qstats = { 0 }; __u32 len = qdisc_qlen_sum(sch); __gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len); *qlen = qstats.qlen; *backlog = qstats.backlog; } static inline void qdisc_tree_flush_backlog(struct Qdisc *sch) { __u32 qlen, backlog; qdisc_qstats_qlen_backlog(sch, &qlen, &backlog); qdisc_tree_reduce_backlog(sch, qlen, backlog); } static inline void qdisc_purge_queue(struct Qdisc *sch) { __u32 qlen, backlog; qdisc_qstats_qlen_backlog(sch, &qlen, &backlog); qdisc_reset(sch); qdisc_tree_reduce_backlog(sch, qlen, backlog); } static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh) { qh->head = NULL; qh->tail = NULL; qh->qlen = 0; } static inline void __qdisc_enqueue_tail(struct sk_buff *skb, struct qdisc_skb_head *qh) { struct sk_buff *last = qh->tail; if (last) { skb->next = NULL; last->next = skb; qh->tail = skb; } else { qh->tail = skb; qh->head = skb; } qh->qlen++; } static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) { __qdisc_enqueue_tail(skb, &sch->q); qdisc_qstats_backlog_inc(sch, skb); return NET_XMIT_SUCCESS; } static inline void __qdisc_enqueue_head(struct sk_buff *skb, struct qdisc_skb_head *qh) { skb->next = qh->head; if (!qh->head) qh->tail = skb; qh->head = skb; qh->qlen++; } static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh) { struct sk_buff *skb = qh->head; if (likely(skb != NULL)) { qh->head = skb->next; qh->qlen--; if (qh->head == NULL) qh->tail = NULL; skb->next = NULL; } return skb; } static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) { struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); if (likely(skb != NULL)) { qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); } return skb; } /* Instead of calling kfree_skb() while root qdisc lock is held, * queue the skb for future freeing at end of __dev_xmit_skb() */ static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free) { skb->next = *to_free; *to_free = skb; } static inline void __qdisc_drop_all(struct sk_buff *skb, struct sk_buff **to_free) { if (skb->prev) skb->prev->next = *to_free; else skb->next = *to_free; *to_free = skb; } static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch, struct qdisc_skb_head *qh, struct sk_buff **to_free) { struct sk_buff *skb = __qdisc_dequeue_head(qh); if (likely(skb != NULL)) { unsigned int len = qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); __qdisc_drop(skb, to_free); return len; } return 0; } static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) { const struct qdisc_skb_head *qh = &sch->q; return qh->head; } /* generic pseudo peek method for non-work-conserving qdisc */ static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) { struct sk_buff *skb = skb_peek(&sch->gso_skb); /* we can reuse ->gso_skb because peek isn't called for root qdiscs */ if (!skb) { skb = sch->dequeue(sch); if (skb) { __skb_queue_head(&sch->gso_skb, skb); /* it's still part of the queue */ qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; } } return skb; } static inline void qdisc_update_stats_at_dequeue(struct Qdisc *sch, struct sk_buff *skb) { if (qdisc_is_percpu_stats(sch)) { qdisc_qstats_cpu_backlog_dec(sch, skb); qdisc_bstats_cpu_update(sch, skb); qdisc_qstats_cpu_qlen_dec(sch); } else { qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); sch->q.qlen--; } } static inline void qdisc_update_stats_at_enqueue(struct Qdisc *sch, unsigned int pkt_len) { if (qdisc_is_percpu_stats(sch)) { qdisc_qstats_cpu_qlen_inc(sch); this_cpu_add(sch->cpu_qstats->backlog, pkt_len); } else { sch->qstats.backlog += pkt_len; sch->q.qlen++; } } /* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) { struct sk_buff *skb = skb_peek(&sch->gso_skb); if (skb) { skb = __skb_dequeue(&sch->gso_skb); if (qdisc_is_percpu_stats(sch)) { qdisc_qstats_cpu_backlog_dec(sch, skb); qdisc_qstats_cpu_qlen_dec(sch); } else { qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } } else { skb = sch->dequeue(sch); } return skb; } static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh) { /* * We do not know the backlog in bytes of this list, it * is up to the caller to correct it */ ASSERT_RTNL(); if (qh->qlen) { rtnl_kfree_skbs(qh->head, qh->tail); qh->head = NULL; qh->tail = NULL; qh->qlen = 0; } } static inline void qdisc_reset_queue(struct Qdisc *sch) { __qdisc_reset_queue(&sch->q); sch->qstats.backlog = 0; } static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, struct Qdisc **pold) { struct Qdisc *old; sch_tree_lock(sch); old = *pold; *pold = new; if (old != NULL) qdisc_purge_queue(old); sch_tree_unlock(sch); return old; } static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) { rtnl_kfree_skbs(skb, skb); qdisc_qstats_drop(sch); } static inline int qdisc_drop_cpu(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { __qdisc_drop(skb, to_free); qdisc_qstats_cpu_drop(sch); return NET_XMIT_DROP; } static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { __qdisc_drop(skb, to_free); qdisc_qstats_drop(sch); return NET_XMIT_DROP; } static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { __qdisc_drop_all(skb, to_free); qdisc_qstats_drop(sch); return NET_XMIT_DROP; } /* Length to Time (L2T) lookup in a qdisc_rate_table, to determine how long it will take to send a packet given its size. */ static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen) { int slot = pktlen + rtab->rate.cell_align + rtab->rate.overhead; if (slot < 0) slot = 0; slot >>= rtab->rate.cell_log; if (slot > 255) return rtab->data[255]*(slot >> 8) + rtab->data[slot & 0xFF]; return rtab->data[slot]; } struct psched_ratecfg { u64 rate_bytes_ps; /* bytes per second */ u32 mult; u16 overhead; u8 linklayer; u8 shift; }; static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, unsigned int len) { len += r->overhead; if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift; return ((u64)len * r->mult) >> r->shift; } void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf, u64 rate64); static inline void psched_ratecfg_getrate(struct tc_ratespec *res, const struct psched_ratecfg *r) { memset(res, 0, sizeof(*res)); /* legacy struct tc_ratespec has a 32bit @rate field * Qdisc using 64bit rate should add new attributes * in order to maintain compatibility. */ res->rate = min_t(u64, r->rate_bytes_ps, ~0U); res->overhead = r->overhead; res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); } /* Mini Qdisc serves for specific needs of ingress/clsact Qdisc. * The fast path only needs to access filter list and to update stats */ struct mini_Qdisc { struct tcf_proto *filter_list; struct tcf_block *block; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; struct rcu_head rcu; }; static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq, const struct sk_buff *skb) { bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb); } static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq) { this_cpu_inc(miniq->cpu_qstats->drops); } struct mini_Qdisc_pair { struct mini_Qdisc miniq1; struct mini_Qdisc miniq2; struct mini_Qdisc __rcu **p_miniq; }; void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, struct tcf_proto *tp_head); void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, struct mini_Qdisc __rcu **p_miniq); void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, struct tcf_block *block); static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res) { return res->ingress ? netif_receive_skb(skb) : dev_queue_xmit(skb); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Queued spinlock * * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P. * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP * * Authors: Waiman Long <waiman.long@hpe.com> */ #ifndef __ASM_GENERIC_QSPINLOCK_H #define __ASM_GENERIC_QSPINLOCK_H #include <asm-generic/qspinlock_types.h> #include <linux/atomic.h> #ifndef queued_spin_is_locked /** * queued_spin_is_locked - is the spinlock locked? * @lock: Pointer to queued spinlock structure * Return: 1 if it is locked, 0 otherwise */ static __always_inline int queued_spin_is_locked(struct qspinlock *lock) { /* * Any !0 state indicates it is locked, even if _Q_LOCKED_VAL * isn't immediately observable. */ return atomic_read(&lock->val); } #endif /** * queued_spin_value_unlocked - is the spinlock structure unlocked? * @lock: queued spinlock structure * Return: 1 if it is unlocked, 0 otherwise * * N.B. Whenever there are tasks waiting for the lock, it is considered * locked wrt the lockref code to avoid lock stealing by the lockref * code and change things underneath the lock. This also allows some * optimizations to be applied without conflict with lockref. */ static __always_inline int queued_spin_value_unlocked(struct qspinlock lock) { return !atomic_read(&lock.val); } /** * queued_spin_is_contended - check if the lock is contended * @lock : Pointer to queued spinlock structure * Return: 1 if lock contended, 0 otherwise */ static __always_inline int queued_spin_is_contended(struct qspinlock *lock) { return atomic_read(&lock->val) & ~_Q_LOCKED_MASK; } /** * queued_spin_trylock - try to acquire the queued spinlock * @lock : Pointer to queued spinlock structure * Return: 1 if lock acquired, 0 if failed */ static __always_inline int queued_spin_trylock(struct qspinlock *lock) { u32 val = atomic_read(&lock->val); if (unlikely(val)) return 0; return likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)); } extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); #ifndef queued_spin_lock /** * queued_spin_lock - acquire a queued spinlock * @lock: Pointer to queued spinlock structure */ static __always_inline void queued_spin_lock(struct qspinlock *lock) { u32 val = 0; if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) return; queued_spin_lock_slowpath(lock, val); } #endif #ifndef queued_spin_unlock /** * queued_spin_unlock - release a queued spinlock * @lock : Pointer to queued spinlock structure */ static __always_inline void queued_spin_unlock(struct qspinlock *lock) { /* * unlock() needs release semantics: */ smp_store_release(&lock->locked, 0); } #endif #ifndef virt_spin_lock static __always_inline bool virt_spin_lock(struct qspinlock *lock) { return false; } #endif /* * Remapping spinlock architecture specific functions to the corresponding * queued spinlock functions. */ #define arch_spin_is_locked(l) queued_spin_is_locked(l) #define arch_spin_is_contended(l) queued_spin_is_contended(l) #define arch_spin_value_unlocked(l) queued_spin_value_unlocked(l) #define arch_spin_lock(l) queued_spin_lock(l) #define arch_spin_trylock(l) queued_spin_trylock(l) #define arch_spin_unlock(l) queued_spin_unlock(l) #endif /* __ASM_GENERIC_QSPINLOCK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Dynamic loading of modules into the kernel. * * Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996 * Rewritten again by Rusty Russell, 2002 */ #ifndef _LINUX_MODULE_H #define _LINUX_MODULE_H #include <linux/list.h> #include <linux/stat.h> #include <linux/compiler.h> #include <linux/cache.h> #include <linux/kmod.h> #include <linux/init.h> #include <linux/elf.h> #include <linux/stringify.h> #include <linux/kobject.h> #include <linux/moduleparam.h> #include <linux/jump_label.h> #include <linux/export.h> #include <linux/rbtree_latch.h> #include <linux/error-injection.h> #include <linux/tracepoint-defs.h> #include <linux/srcu.h> #include <linux/static_call_types.h> #include <linux/percpu.h> #include <asm/module.h> /* Not Yet Implemented */ #define MODULE_SUPPORTED_DEVICE(name) #define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN struct modversion_info { unsigned long crc; char name[MODULE_NAME_LEN]; }; struct module; struct exception_table_entry; struct module_kobject { struct kobject kobj; struct module *mod; struct kobject *drivers_dir; struct module_param_attrs *mp; struct completion *kobj_completion; } __randomize_layout; struct module_attribute { struct attribute attr; ssize_t (*show)(struct module_attribute *, struct module_kobject *, char *); ssize_t (*store)(struct module_attribute *, struct module_kobject *, const char *, size_t count); void (*setup)(struct module *, const char *); int (*test)(struct module *); void (*free)(struct module *); }; struct module_version_attribute { struct module_attribute mattr; const char *module_name; const char *version; } __attribute__ ((__aligned__(sizeof(void *)))); extern ssize_t __modver_version_show(struct module_attribute *, struct module_kobject *, char *); extern struct module_attribute module_uevent; /* These are either module local, or the kernel's dummy ones. */ extern int init_module(void); extern void cleanup_module(void); #ifndef MODULE /** * module_init() - driver initialization entry point * @x: function to be run at kernel boot time or module insertion * * module_init() will either be called during do_initcalls() (if * builtin) or at module insertion time (if a module). There can only * be one per module. */ #define module_init(x) __initcall(x); /** * module_exit() - driver exit entry point * @x: function to be run when driver is removed * * module_exit() will wrap the driver clean-up code * with cleanup_module() when used with rmmod when * the driver is a module. If the driver is statically * compiled into the kernel, module_exit() has no effect. * There can only be one per module. */ #define module_exit(x) __exitcall(x); #else /* MODULE */ /* * In most cases loadable modules do not need custom * initcall levels. There are still some valid cases where * a driver may be needed early if built in, and does not * matter when built as a loadable module. Like bus * snooping debug drivers. */ #define early_initcall(fn) module_init(fn) #define core_initcall(fn) module_init(fn) #define core_initcall_sync(fn) module_init(fn) #define postcore_initcall(fn) module_init(fn) #define postcore_initcall_sync(fn) module_init(fn) #define arch_initcall(fn) module_init(fn) #define subsys_initcall(fn) module_init(fn) #define subsys_initcall_sync(fn) module_init(fn) #define fs_initcall(fn) module_init(fn) #define fs_initcall_sync(fn) module_init(fn) #define rootfs_initcall(fn) module_init(fn) #define device_initcall(fn) module_init(fn) #define device_initcall_sync(fn) module_init(fn) #define late_initcall(fn) module_init(fn) #define late_initcall_sync(fn) module_init(fn) #define console_initcall(fn) module_init(fn) /* Each module must use one module_init(). */ #define module_init(initfn) \ static inline initcall_t __maybe_unused __inittest(void) \ { return initfn; } \ int init_module(void) __copy(initfn) __attribute__((alias(#initfn))); /* This is only required if you want to be unloadable. */ #define module_exit(exitfn) \ static inline exitcall_t __maybe_unused __exittest(void) \ { return exitfn; } \ void cleanup_module(void) __copy(exitfn) __attribute__((alias(#exitfn))); #endif /* This means "can be init if no module support, otherwise module load may call it." */ #ifdef CONFIG_MODULES #define __init_or_module #define __initdata_or_module #define __initconst_or_module #define __INIT_OR_MODULE .text #define __INITDATA_OR_MODULE .data #define __INITRODATA_OR_MODULE .section ".rodata","a",%progbits #else #define __init_or_module __init #define __initdata_or_module __initdata #define __initconst_or_module __initconst #define __INIT_OR_MODULE __INIT #define __INITDATA_OR_MODULE __INITDATA #define __INITRODATA_OR_MODULE __INITRODATA #endif /*CONFIG_MODULES*/ /* Generic info of form tag = "info" */ #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) /* For userspace: you can also call me... */ #define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias) /* Soft module dependencies. See man modprobe.d for details. * Example: MODULE_SOFTDEP("pre: module-foo module-bar post: module-baz") */ #define MODULE_SOFTDEP(_softdep) MODULE_INFO(softdep, _softdep) /* * MODULE_FILE is used for generating modules.builtin * So, make it no-op when this is being built as a module */ #ifdef MODULE #define MODULE_FILE #else #define MODULE_FILE MODULE_INFO(file, KBUILD_MODFILE); #endif /* * The following license idents are currently accepted as indicating free * software modules * * "GPL" [GNU Public License v2] * "GPL v2" [GNU Public License v2] * "GPL and additional rights" [GNU Public License v2 rights and more] * "Dual BSD/GPL" [GNU Public License v2 * or BSD license choice] * "Dual MIT/GPL" [GNU Public License v2 * or MIT license choice] * "Dual MPL/GPL" [GNU Public License v2 * or Mozilla license choice] * * The following other idents are available * * "Proprietary" [Non free products] * * Both "GPL v2" and "GPL" (the latter also in dual licensed strings) are * merely stating that the module is licensed under the GPL v2, but are not * telling whether "GPL v2 only" or "GPL v2 or later". The reason why there * are two variants is a historic and failed attempt to convey more * information in the MODULE_LICENSE string. For module loading the * "only/or later" distinction is completely irrelevant and does neither * replace the proper license identifiers in the corresponding source file * nor amends them in any way. The sole purpose is to make the * 'Proprietary' flagging work and to refuse to bind symbols which are * exported with EXPORT_SYMBOL_GPL when a non free module is loaded. * * In the same way "BSD" is not a clear license information. It merely * states, that the module is licensed under one of the compatible BSD * license variants. The detailed and correct license information is again * to be found in the corresponding source files. * * There are dual licensed components, but when running with Linux it is the * GPL that is relevant so this is a non issue. Similarly LGPL linked with GPL * is a GPL combined work. * * This exists for several reasons * 1. So modinfo can show license info for users wanting to vet their setup * is free * 2. So the community can ignore bug reports including proprietary modules * 3. So vendors can do likewise based on their own policies */ #define MODULE_LICENSE(_license) MODULE_FILE MODULE_INFO(license, _license) /* * Author(s), use "Name <email>" or just "Name", for multiple * authors use multiple MODULE_AUTHOR() statements/lines. */ #define MODULE_AUTHOR(_author) MODULE_INFO(author, _author) /* What your module does. */ #define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description) #ifdef MODULE /* Creates an alias so file2alias.c can find device table. */ #define MODULE_DEVICE_TABLE(type, name) \ extern typeof(name) __mod_##type##__##name##_device_table \ __attribute__ ((unused, alias(__stringify(name)))) #else /* !MODULE */ #define MODULE_DEVICE_TABLE(type, name) #endif /* Version of form [<epoch>:]<version>[-<extra-version>]. * Or for CVS/RCS ID version, everything but the number is stripped. * <epoch>: A (small) unsigned integer which allows you to start versions * anew. If not mentioned, it's zero. eg. "2:1.0" is after * "1:2.0". * <version>: The <version> may contain only alphanumerics and the * character `.'. Ordered by numeric sort for numeric parts, * ascii sort for ascii parts (as per RPM or DEB algorithm). * <extraversion>: Like <version>, but inserted for local * customizations, eg "rh3" or "rusty1". * Using this automatically adds a checksum of the .c files and the * local headers in "srcversion". */ #if defined(MODULE) || !defined(CONFIG_SYSFS) #define MODULE_VERSION(_version) MODULE_INFO(version, _version) #else #define MODULE_VERSION(_version) \ MODULE_INFO(version, _version); \ static struct module_version_attribute ___modver_attr = { \ .mattr = { \ .attr = { \ .name = "version", \ .mode = S_IRUGO, \ }, \ .show = __modver_version_show, \ }, \ .module_name = KBUILD_MODNAME, \ .version = _version, \ }; \ static const struct module_version_attribute \ __used __section("__modver") \ * __moduleparam_const __modver_attr = &___modver_attr #endif /* Optional firmware file (or files) needed by the module * format is simply firmware file name. Multiple firmware * files require multiple MODULE_FIRMWARE() specifiers */ #define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware) #define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns) struct notifier_block; #ifdef CONFIG_MODULES extern int modules_disabled; /* for sysctl */ /* Get/put a kernel symbol (calls must be symmetric) */ void *__symbol_get(const char *symbol); void *__symbol_get_gpl(const char *symbol); #define symbol_get(x) ((typeof(&x))(__symbol_get(__stringify(x)))) /* modules using other modules: kdb wants to see this. */ struct module_use { struct list_head source_list; struct list_head target_list; struct module *source, *target; }; enum module_state { MODULE_STATE_LIVE, /* Normal state. */ MODULE_STATE_COMING, /* Full formed, running module_init. */ MODULE_STATE_GOING, /* Going away. */ MODULE_STATE_UNFORMED, /* Still setting it up. */ }; struct mod_tree_node { struct module *mod; struct latch_tree_node node; }; struct module_layout { /* The actual code + data. */ void *base; /* Total size. */ unsigned int size; /* The size of the executable code. */ unsigned int text_size; /* Size of RO section of the module (text+rodata) */ unsigned int ro_size; /* Size of RO after init section */ unsigned int ro_after_init_size; #ifdef CONFIG_MODULES_TREE_LOOKUP struct mod_tree_node mtn; #endif }; #ifdef CONFIG_MODULES_TREE_LOOKUP /* Only touch one cacheline for common rbtree-for-core-layout case. */ #define __module_layout_align ____cacheline_aligned #else #define __module_layout_align #endif struct mod_kallsyms { Elf_Sym *symtab; unsigned int num_symtab; char *strtab; char *typetab; }; #ifdef CONFIG_LIVEPATCH struct klp_modinfo { Elf_Ehdr hdr; Elf_Shdr *sechdrs; char *secstrings; unsigned int symndx; }; #endif struct module { enum module_state state; /* Member of list of modules */ struct list_head list; /* Unique handle for this module */ char name[MODULE_NAME_LEN]; /* Sysfs stuff. */ struct module_kobject mkobj; struct module_attribute *modinfo_attrs; const char *version; const char *srcversion; struct kobject *holders_dir; /* Exported symbols */ const struct kernel_symbol *syms; const s32 *crcs; unsigned int num_syms; /* Kernel parameters. */ #ifdef CONFIG_SYSFS struct mutex param_lock; #endif struct kernel_param *kp; unsigned int num_kp; /* GPL-only exported symbols. */ unsigned int num_gpl_syms; const struct kernel_symbol *gpl_syms; const s32 *gpl_crcs; bool using_gplonly_symbols; #ifdef CONFIG_UNUSED_SYMBOLS /* unused exported symbols. */ const struct kernel_symbol *unused_syms; const s32 *unused_crcs; unsigned int num_unused_syms; /* GPL-only, unused exported symbols. */ unsigned int num_unused_gpl_syms; const struct kernel_symbol *unused_gpl_syms; const s32 *unused_gpl_crcs; #endif #ifdef CONFIG_MODULE_SIG /* Signature was verified. */ bool sig_ok; #endif bool async_probe_requested; /* symbols that will be GPL-only in the near future. */ const struct kernel_symbol *gpl_future_syms; const s32 *gpl_future_crcs; unsigned int num_gpl_future_syms; /* Exception table */ unsigned int num_exentries; struct exception_table_entry *extable; /* Startup function. */ int (*init)(void); /* Core layout: rbtree is accessed frequently, so keep together. */ struct module_layout core_layout __module_layout_align; struct module_layout init_layout; /* Arch-specific module values */ struct mod_arch_specific arch; unsigned long taints; /* same bits as kernel:taint_flags */ #ifdef CONFIG_GENERIC_BUG /* Support for BUG */ unsigned num_bugs; struct list_head bug_list; struct bug_entry *bug_table; #endif #ifdef CONFIG_KALLSYMS /* Protected by RCU and/or module_mutex: use rcu_dereference() */ struct mod_kallsyms __rcu *kallsyms; struct mod_kallsyms core_kallsyms; /* Section attributes */ struct module_sect_attrs *sect_attrs; /* Notes attributes */ struct module_notes_attrs *notes_attrs; #endif /* The command line arguments (may be mangled). People like keeping pointers to this stuff */ char *args; #ifdef CONFIG_SMP /* Per-cpu data. */ void __percpu *percpu; unsigned int percpu_size; #endif void *noinstr_text_start; unsigned int noinstr_text_size; #ifdef CONFIG_TRACEPOINTS unsigned int num_tracepoints; tracepoint_ptr_t *tracepoints_ptrs; #endif #ifdef CONFIG_TREE_SRCU unsigned int num_srcu_structs; struct srcu_struct **srcu_struct_ptrs; #endif #ifdef CONFIG_BPF_EVENTS unsigned int num_bpf_raw_events; struct bpf_raw_event_map *bpf_raw_events; #endif #ifdef CONFIG_JUMP_LABEL struct jump_entry *jump_entries; unsigned int num_jump_entries; #endif #ifdef CONFIG_TRACING unsigned int num_trace_bprintk_fmt; const char **trace_bprintk_fmt_start; #endif #ifdef CONFIG_EVENT_TRACING struct trace_event_call **trace_events; unsigned int num_trace_events; struct trace_eval_map **trace_evals; unsigned int num_trace_evals; #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD unsigned int num_ftrace_callsites; unsigned long *ftrace_callsites; #endif #ifdef CONFIG_KPROBES void *kprobes_text_start; unsigned int kprobes_text_size; unsigned long *kprobe_blacklist; unsigned int num_kprobe_blacklist; #endif #ifdef CONFIG_HAVE_STATIC_CALL_INLINE int num_static_call_sites; struct static_call_site *static_call_sites; #endif #ifdef CONFIG_LIVEPATCH bool klp; /* Is this a livepatch module? */ bool klp_alive; /* Elf information */ struct klp_modinfo *klp_info; #endif #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ struct list_head source_list; /* What modules do I depend on? */ struct list_head target_list; /* Destruction function. */ void (*exit)(void); atomic_t refcnt; #endif #ifdef CONFIG_CONSTRUCTORS /* Constructor functions. */ ctor_fn_t *ctors; unsigned int num_ctors; #endif #ifdef CONFIG_FUNCTION_ERROR_INJECTION struct error_injection_entry *ei_funcs; unsigned int num_ei_funcs; #endif } ____cacheline_aligned __randomize_layout; #ifndef MODULE_ARCH_INIT #define MODULE_ARCH_INIT {} #endif #ifndef HAVE_ARCH_KALLSYMS_SYMBOL_VALUE static inline unsigned long kallsyms_symbol_value(const Elf_Sym *sym) { return sym->st_value; } #endif extern struct mutex module_mutex; /* FIXME: It'd be nice to isolate modules during init, too, so they aren't used before they (may) fail. But presently too much code (IDE & SCSI) require entry into the module during init.*/ static inline bool module_is_live(struct module *mod) { return mod->state != MODULE_STATE_GOING; } struct module *__module_text_address(unsigned long addr); struct module *__module_address(unsigned long addr); bool is_module_address(unsigned long addr); bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr); bool is_module_percpu_address(unsigned long addr); bool is_module_text_address(unsigned long addr); static inline bool within_module_core(unsigned long addr, const struct module *mod) { return (unsigned long)mod->core_layout.base <= addr && addr < (unsigned long)mod->core_layout.base + mod->core_layout.size; } static inline bool within_module_init(unsigned long addr, const struct module *mod) { return (unsigned long)mod->init_layout.base <= addr && addr < (unsigned long)mod->init_layout.base + mod->init_layout.size; } static inline bool within_module(unsigned long addr, const struct module *mod) { return within_module_init(addr, mod) || within_module_core(addr, mod); } /* Search for module by name: must hold module_mutex. */ struct module *find_module(const char *name); struct symsearch { const struct kernel_symbol *start, *stop; const s32 *crcs; enum mod_license { NOT_GPL_ONLY, GPL_ONLY, WILL_BE_GPL_ONLY, } license; bool unused; }; /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if symnum out of range. */ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *name, char *module_name, int *exported); /* Look for this name: can be of form module:name. */ unsigned long module_kallsyms_lookup_name(const char *name); int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data); extern void __noreturn __module_put_and_exit(struct module *mod, long code); #define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code) #ifdef CONFIG_MODULE_UNLOAD int module_refcount(struct module *mod); void __symbol_put(const char *symbol); #define symbol_put(x) __symbol_put(__stringify(x)) void symbol_put_addr(void *addr); /* Sometimes we know we already have a refcount, and it's easier not to handle the error case (which only happens with rmmod --wait). */ extern void __module_get(struct module *module); /* This is the Right Way to get a module: if it fails, it's being removed, * so pretend it's not there. */ extern bool try_module_get(struct module *module); extern void module_put(struct module *module); #else /*!CONFIG_MODULE_UNLOAD*/ static inline bool try_module_get(struct module *module) { return !module || module_is_live(module); } static inline void module_put(struct module *module) { } static inline void __module_get(struct module *module) { } #define symbol_put(x) do { } while (0) #define symbol_put_addr(p) do { } while (0) #endif /* CONFIG_MODULE_UNLOAD */ /* This is a #define so the string doesn't get put in every .o file */ #define module_name(mod) \ ({ \ struct module *__mod = (mod); \ __mod ? __mod->name : "kernel"; \ }) /* Dereference module function descriptor */ void *dereference_module_function_descriptor(struct module *mod, void *ptr); /* For kallsyms to ask for address resolution. namebuf should be at * least KSYM_NAME_LEN long: a pointer to namebuf is returned if * found, otherwise NULL. */ const char *module_address_lookup(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, char *namebuf); int lookup_module_symbol_name(unsigned long addr, char *symname); int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name); int register_module_notifier(struct notifier_block *nb); int unregister_module_notifier(struct notifier_block *nb); extern void print_modules(void); static inline bool module_requested_async_probing(struct module *module) { return module && module->async_probe_requested; } #ifdef CONFIG_LIVEPATCH static inline bool is_livepatch_module(struct module *mod) { return mod->klp; } #else /* !CONFIG_LIVEPATCH */ static inline bool is_livepatch_module(struct module *mod) { return false; } #endif /* CONFIG_LIVEPATCH */ bool is_module_sig_enforced(void); void set_module_sig_enforced(void); #else /* !CONFIG_MODULES... */ static inline struct module *__module_address(unsigned long addr) { return NULL; } static inline struct module *__module_text_address(unsigned long addr) { return NULL; } static inline bool is_module_address(unsigned long addr) { return false; } static inline bool is_module_percpu_address(unsigned long addr) { return false; } static inline bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) { return false; } static inline bool is_module_text_address(unsigned long addr) { return false; } static inline bool within_module_core(unsigned long addr, const struct module *mod) { return false; } static inline bool within_module_init(unsigned long addr, const struct module *mod) { return false; } static inline bool within_module(unsigned long addr, const struct module *mod) { return false; } /* Get/put a kernel symbol (calls should be symmetric) */ #define symbol_get(x) ({ extern typeof(x) x __attribute__((weak,visibility("hidden"))); &(x); }) #define symbol_put(x) do { } while (0) #define symbol_put_addr(x) do { } while (0) static inline void __module_get(struct module *module) { } static inline bool try_module_get(struct module *module) { return true; } static inline void module_put(struct module *module) { } #define module_name(mod) "kernel" /* For kallsyms to ask for address resolution. NULL means not found. */ static inline const char *module_address_lookup(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, char *namebuf) { return NULL; } static inline int lookup_module_symbol_name(unsigned long addr, char *symname) { return -ERANGE; } static inline int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name) { return -ERANGE; } static inline int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *name, char *module_name, int *exported) { return -ERANGE; } static inline unsigned long module_kallsyms_lookup_name(const char *name) { return 0; } static inline int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data) { return 0; } static inline int register_module_notifier(struct notifier_block *nb) { /* no events will happen anyway, so this can always succeed */ return 0; } static inline int unregister_module_notifier(struct notifier_block *nb) { return 0; } #define module_put_and_exit(code) do_exit(code) static inline void print_modules(void) { } static inline bool module_requested_async_probing(struct module *module) { return false; } static inline bool is_module_sig_enforced(void) { return false; } static inline void set_module_sig_enforced(void) { } /* Dereference module function descriptor */ static inline void *dereference_module_function_descriptor(struct module *mod, void *ptr) { return ptr; } #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS extern struct kset *module_kset; extern struct kobj_type module_ktype; extern int module_sysfs_initialized; #endif /* CONFIG_SYSFS */ #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x) /* BELOW HERE ALL THESE ARE OBSOLETE AND WILL VANISH */ #define __MODULE_STRING(x) __stringify(x) #ifdef CONFIG_GENERIC_BUG void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, struct module *); void module_bug_cleanup(struct module *); #else /* !CONFIG_GENERIC_BUG */ static inline void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { } static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ #ifdef CONFIG_RETPOLINE extern bool retpoline_module_ok(bool has_retpoline); #else static inline bool retpoline_module_ok(bool has_retpoline) { return true; } #endif #ifdef CONFIG_MODULE_SIG static inline bool module_sig_ok(struct module *module) { return module->sig_ok; } #else /* !CONFIG_MODULE_SIG */ static inline bool module_sig_ok(struct module *module) { return true; } #endif /* CONFIG_MODULE_SIG */ #endif /* _LINUX_MODULE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PGTABLE_64_H #define _ASM_X86_PGTABLE_64_H #include <linux/const.h> #include <asm/pgtable_64_types.h> #ifndef __ASSEMBLY__ /* * This file contains the functions and defines necessary to modify and use * the x86-64 page table tree. */ #include <asm/processor.h> #include <linux/bitops.h> #include <linux/threads.h> #include <asm/fixmap.h> extern p4d_t level4_kernel_pgt[512]; extern p4d_t level4_ident_pgt[512]; extern pud_t level3_kernel_pgt[512]; extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pmd_t level2_fixmap_pgt[512]; extern pmd_t level2_ident_pgt[512]; extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM]; extern pgd_t init_top_pgt[]; #define swapper_pg_dir init_top_pgt extern void paging_init(void); static inline void sync_initial_page_table(void) { } #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pte_val(e)) #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pmd_val(e)) #define pud_ERROR(e) \ pr_err("%s:%d: bad pud %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pud_val(e)) #if CONFIG_PGTABLE_LEVELS >= 5 #define p4d_ERROR(e) \ pr_err("%s:%d: bad p4d %p(%016lx)\n", \ __FILE__, __LINE__, &(e), p4d_val(e)) #endif #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pgd_val(e)) struct mm_struct; #define mm_p4d_folded mm_p4d_folded static inline bool mm_p4d_folded(struct mm_struct *mm) { return !pgtable_l5_enabled(); } void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte); void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); static inline void native_set_pte(pte_t *ptep, pte_t pte) { WRITE_ONCE(*ptep, pte); } static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { native_set_pte(ptep, native_make_pte(0)); } static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) { native_set_pte(ptep, pte); } static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { WRITE_ONCE(*pmdp, pmd); } static inline void native_pmd_clear(pmd_t *pmd) { native_set_pmd(pmd, native_make_pmd(0)); } static inline pte_t native_ptep_get_and_clear(pte_t *xp) { #ifdef CONFIG_SMP return native_make_pte(xchg(&xp->pte, 0)); #else /* native_local_ptep_get_and_clear, but duplicated because of cyclic dependency */ pte_t ret = *xp; native_pte_clear(NULL, 0, xp); return ret; #endif } static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) { #ifdef CONFIG_SMP return native_make_pmd(xchg(&xp->pmd, 0)); #else /* native_local_pmdp_get_and_clear, but duplicated because of cyclic dependency */ pmd_t ret = *xp; native_pmd_clear(xp); return ret; #endif } static inline void native_set_pud(pud_t *pudp, pud_t pud) { WRITE_ONCE(*pudp, pud); } static inline void native_pud_clear(pud_t *pud) { native_set_pud(pud, native_make_pud(0)); } static inline pud_t native_pudp_get_and_clear(pud_t *xp) { #ifdef CONFIG_SMP return native_make_pud(xchg(&xp->pud, 0)); #else /* native_local_pudp_get_and_clear, * but duplicated because of cyclic dependency */ pud_t ret = *xp; native_pud_clear(xp); return ret; #endif } static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { pgd_t pgd; if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) { WRITE_ONCE(*p4dp, p4d); return; } pgd = native_make_pgd(native_p4d_val(p4d)); pgd = pti_set_user_pgtbl((pgd_t *)p4dp, pgd); WRITE_ONCE(*p4dp, native_make_p4d(native_pgd_val(pgd))); } static inline void native_p4d_clear(p4d_t *p4d) { native_set_p4d(p4d, native_make_p4d(0)); } static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { WRITE_ONCE(*pgdp, pti_set_user_pgtbl(pgdp, pgd)); } static inline void native_pgd_clear(pgd_t *pgd) { native_set_pgd(pgd, native_make_pgd(0)); } /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. */ /* PGD - Level 4 access */ /* PUD - Level 3 access */ /* PMD - Level 2 access */ /* PTE - Level 1 access */ /* * Encode and de-code a swap entry * * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|F|SD|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above * there. We also need to avoid using A and D because of an * erratum where they can be incorrectly set by hardware on * non-present PTEs. * * SD Bits 1-4 are not used in non-present format and available for * special use described below: * * SD (1) in swp entry is used to store soft dirty bit, which helps us * remember soft dirty over page migration * * F (2) in swp entry is used to record when a pagetable is * writeprotected by userfaultfd WP support. * * Bit 7 in swp entry should be 0 because pmd_present checks not only P, * but also L and G. * * The offset is inverted by a binary not operation to make the high * physical bits set. */ #define SWP_TYPE_BITS 5 #define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) /* We always extract/encode the offset by shifting it all the way up, and then down again */ #define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT+SWP_TYPE_BITS) #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) /* Extract the high bits for type */ #define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS)) /* Shift up (to get rid of type), then down to get value */ #define __swp_offset(x) (~(x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT) /* * Shift the offset up "too far" by TYPE bits, then down again * The offset is inverted by a binary not operation to make the high * physical bits set. */ #define __swp_entry(type, offset) ((swp_entry_t) { \ (~(unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \ | ((unsigned long)(type) << (64-SWP_TYPE_BITS)) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) #define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val((pmd)) }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) #define __swp_entry_to_pmd(x) ((pmd_t) { .pmd = (x).val }) extern int kern_addr_valid(unsigned long addr); extern void cleanup_highmap(void); #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN #define PAGE_AGP PAGE_KERNEL_NOCACHE #define HAVE_PAGE_AGP 1 /* fs/proc/kcore.c */ #define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK) #define kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK) #define __HAVE_ARCH_PTE_SAME #define vmemmap ((struct page *)VMEMMAP_START) extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); #define gup_fast_permitted gup_fast_permitted static inline bool gup_fast_permitted(unsigned long start, unsigned long end) { if (end >> __VIRTUAL_MASK_SHIFT) return false; return true; } #include <asm/pgtable-invert.h> #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_64_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_DST_CACHE_H #define _NET_DST_CACHE_H #include <linux/jiffies.h> #include <net/dst.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ip6_fib.h> #endif struct dst_cache { struct dst_cache_pcpu __percpu *cache; unsigned long reset_ts; }; /** * dst_cache_get - perform cache lookup * @dst_cache: the cache * * The caller should use dst_cache_get_ip4() if it need to retrieve the * source address to be used when xmitting to the cached dst. * local BH must be disabled. */ struct dst_entry *dst_cache_get(struct dst_cache *dst_cache); /** * dst_cache_get_ip4 - perform cache lookup and fetch ipv4 source address * @dst_cache: the cache * @saddr: return value for the retrieved source address * * local BH must be disabled. */ struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr); /** * dst_cache_set_ip4 - store the ipv4 dst into the cache * @dst_cache: the cache * @dst: the entry to be cached * @saddr: the source address to be stored inside the cache * * local BH must be disabled. */ void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst, __be32 saddr); #if IS_ENABLED(CONFIG_IPV6) /** * dst_cache_set_ip6 - store the ipv6 dst into the cache * @dst_cache: the cache * @dst: the entry to be cached * @saddr: the source address to be stored inside the cache * * local BH must be disabled. */ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst, const struct in6_addr *saddr); /** * dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address * @dst_cache: the cache * @saddr: return value for the retrieved source address * * local BH must be disabled. */ struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache, struct in6_addr *saddr); #endif /** * dst_cache_reset - invalidate the cache contents * @dst_cache: the cache * * This does not free the cached dst to avoid races and contentions. * the dst will be freed on later cache lookup. */ static inline void dst_cache_reset(struct dst_cache *dst_cache) { dst_cache->reset_ts = jiffies; } /** * dst_cache_reset_now - invalidate the cache contents immediately * @dst_cache: the cache * * The caller must be sure there are no concurrent users, as this frees * all dst_cache users immediately, rather than waiting for the next * per-cpu usage like dst_cache_reset does. Most callers should use the * higher speed lazily-freed dst_cache_reset function instead. */ void dst_cache_reset_now(struct dst_cache *dst_cache); /** * dst_cache_init - initialize the cache, allocating the required storage * @dst_cache: the cache * @gfp: allocation flags */ int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp); /** * dst_cache_destroy - empty the cache and free the allocated storage * @dst_cache: the cache * * No synchronization is enforced: it must be called only when the cache * is unsed. */ void dst_cache_destroy(struct dst_cache *dst_cache); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_COMPAT_H #define _ASM_X86_COMPAT_H /* * Architecture specific compatibility types */ #include <linux/types.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> #include <asm/processor.h> #include <asm/user32.h> #include <asm/unistd.h> #include <asm-generic/compat.h> #define COMPAT_USER_HZ 100 #define COMPAT_UTS_MACHINE "i686\0\0" typedef u16 __compat_uid_t; typedef u16 __compat_gid_t; typedef u32 __compat_uid32_t; typedef u32 __compat_gid32_t; typedef u16 compat_mode_t; typedef u16 compat_dev_t; typedef u16 compat_nlink_t; typedef u16 compat_ipc_pid_t; typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; struct compat_stat { compat_dev_t st_dev; u16 __pad1; compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; __compat_uid_t st_uid; __compat_gid_t st_gid; compat_dev_t st_rdev; u16 __pad2; u32 st_size; u32 st_blksize; u32 st_blocks; u32 st_atime; u32 st_atime_nsec; u32 st_mtime; u32 st_mtime_nsec; u32 st_ctime; u32 st_ctime_nsec; u32 __unused4; u32 __unused5; }; struct compat_flock { short l_type; short l_whence; compat_off_t l_start; compat_off_t l_len; compat_pid_t l_pid; }; #define F_GETLK64 12 /* using 'struct flock64' */ #define F_SETLK64 13 #define F_SETLKW64 14 /* * IA32 uses 4 byte alignment for 64 bit quantities, * so we need to pack this structure. */ struct compat_flock64 { short l_type; short l_whence; compat_loff_t l_start; compat_loff_t l_len; compat_pid_t l_pid; } __attribute__((packed)); struct compat_statfs { int f_type; int f_bsize; int f_blocks; int f_bfree; int f_bavail; int f_files; int f_ffree; compat_fsid_t f_fsid; int f_namelen; /* SunOS ignores this field. */ int f_frsize; int f_flags; int f_spare[4]; }; #define COMPAT_RLIM_INFINITY 0xffffffff typedef u32 compat_old_sigset_t; /* at least 32 bits */ #define _COMPAT_NSIG 64 #define _COMPAT_NSIG_BPW 32 typedef u32 compat_sigset_word; #define COMPAT_OFF_T_MAX 0x7fffffff struct compat_ipc64_perm { compat_key_t key; __compat_uid32_t uid; __compat_gid32_t gid; __compat_uid32_t cuid; __compat_gid32_t cgid; unsigned short mode; unsigned short __pad1; unsigned short seq; unsigned short __pad2; compat_ulong_t unused1; compat_ulong_t unused2; }; struct compat_semid64_ds { struct compat_ipc64_perm sem_perm; compat_ulong_t sem_otime; compat_ulong_t sem_otime_high; compat_ulong_t sem_ctime; compat_ulong_t sem_ctime_high; compat_ulong_t sem_nsems; compat_ulong_t __unused3; compat_ulong_t __unused4; }; struct compat_msqid64_ds { struct compat_ipc64_perm msg_perm; compat_ulong_t msg_stime; compat_ulong_t msg_stime_high; compat_ulong_t msg_rtime; compat_ulong_t msg_rtime_high; compat_ulong_t msg_ctime; compat_ulong_t msg_ctime_high; compat_ulong_t msg_cbytes; compat_ulong_t msg_qnum; compat_ulong_t msg_qbytes; compat_pid_t msg_lspid; compat_pid_t msg_lrpid; compat_ulong_t __unused4; compat_ulong_t __unused5; }; struct compat_shmid64_ds { struct compat_ipc64_perm shm_perm; compat_size_t shm_segsz; compat_ulong_t shm_atime; compat_ulong_t shm_atime_high; compat_ulong_t shm_dtime; compat_ulong_t shm_dtime_high; compat_ulong_t shm_ctime; compat_ulong_t shm_ctime_high; compat_pid_t shm_cpid; compat_pid_t shm_lpid; compat_ulong_t shm_nattch; compat_ulong_t __unused4; compat_ulong_t __unused5; }; /* * The type of struct elf_prstatus.pr_reg in compatible core dumps. */ typedef struct user_regs_struct compat_elf_gregset_t; /* Full regset -- prstatus on x32, otherwise on ia32 */ #define PRSTATUS_SIZE(S, R) (R != sizeof(S.pr_reg) ? 144 : 296) #define SET_PR_FPVALID(S, V, R) \ do { *(int *) (((void *) &((S)->pr_reg)) + R) = (V); } \ while (0) #ifdef CONFIG_X86_X32_ABI #define COMPAT_USE_64BIT_TIME \ (!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)) #endif static inline void __user *arch_compat_alloc_user_space(long len) { compat_uptr_t sp; if (test_thread_flag(TIF_IA32)) { sp = task_pt_regs(current)->sp; } else { /* -128 for the x32 ABI redzone */ sp = task_pt_regs(current)->sp - 128; } return (void __user *)round_down(sp - len, 16); } static inline bool in_x32_syscall(void) { #ifdef CONFIG_X86_X32_ABI if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT) return true; #endif return false; } static inline bool in_32bit_syscall(void) { return in_ia32_syscall() || in_x32_syscall(); } #ifdef CONFIG_COMPAT static inline bool in_compat_syscall(void) { return in_32bit_syscall(); } #define in_compat_syscall in_compat_syscall /* override the generic impl */ #define compat_need_64bit_alignment_fixup in_ia32_syscall #endif struct compat_siginfo; #ifdef CONFIG_X86_X32_ABI int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginfo_t *from); #define copy_siginfo_to_user32 copy_siginfo_to_user32 #endif /* CONFIG_X86_X32_ABI */ #endif /* _ASM_X86_COMPAT_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM mmap #if !defined(_TRACE_MMAP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MMAP_H #include <linux/tracepoint.h> TRACE_EVENT(vm_unmapped_area, TP_PROTO(unsigned long addr, struct vm_unmapped_area_info *info), TP_ARGS(addr, info), TP_STRUCT__entry( __field(unsigned long, addr) __field(unsigned long, total_vm) __field(unsigned long, flags) __field(unsigned long, length) __field(unsigned long, low_limit) __field(unsigned long, high_limit) __field(unsigned long, align_mask) __field(unsigned long, align_offset) ), TP_fast_assign( __entry->addr = addr; __entry->total_vm = current->mm->total_vm; __entry->flags = info->flags; __entry->length = info->length; __entry->low_limit = info->low_limit; __entry->high_limit = info->high_limit; __entry->align_mask = info->align_mask; __entry->align_offset = info->align_offset; ), TP_printk("addr=0x%lx err=%ld total_vm=0x%lx flags=0x%lx len=0x%lx lo=0x%lx hi=0x%lx mask=0x%lx ofs=0x%lx\n", IS_ERR_VALUE(__entry->addr) ? 0 : __entry->addr, IS_ERR_VALUE(__entry->addr) ? __entry->addr : 0, __entry->total_vm, __entry->flags, __entry->length, __entry->low_limit, __entry->high_limit, __entry->align_mask, __entry->align_offset) ); #endif /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PVCLOCK_H #define _ASM_X86_PVCLOCK_H #include <asm/clocksource.h> #include <asm/pvclock-abi.h> /* some helper functions for xen and kvm pv clock sources */ u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src); void pvclock_set_flags(u8 flags); unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); void pvclock_read_wallclock(struct pvclock_wall_clock *wall, struct pvclock_vcpu_time_info *vcpu, struct timespec64 *ts); void pvclock_resume(void); void pvclock_touch_watchdogs(void); static __always_inline unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src) { unsigned version = src->version & ~1; /* Make sure that the version is read before the data. */ virt_rmb(); return version; } static __always_inline bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src, unsigned version) { /* Make sure that the version is re-read after the data. */ virt_rmb(); return unlikely(version != src->version); } /* * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, * yielding a 64-bit result. */ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) { u64 product; #ifdef __i386__ u32 tmp1, tmp2; #else ulong tmp; #endif if (shift < 0) delta >>= -shift; else delta <<= shift; #ifdef __i386__ __asm__ ( "mul %5 ; " "mov %4,%%eax ; " "mov %%edx,%4 ; " "mul %5 ; " "xor %5,%5 ; " "add %4,%%eax ; " "adc %5,%%edx ; " : "=A" (product), "=r" (tmp1), "=r" (tmp2) : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); #elif defined(__x86_64__) __asm__ ( "mulq %[mul_frac] ; shrd $32, %[hi], %[lo]" : [lo]"=a"(product), [hi]"=d"(tmp) : "0"(delta), [mul_frac]"rm"((u64)mul_frac)); #else #error implement me! #endif return product; } static __always_inline u64 __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, u64 tsc) { u64 delta = tsc - src->tsc_timestamp; u64 offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, src->tsc_shift); return src->system_time + offset; } struct pvclock_vsyscall_time_info { struct pvclock_vcpu_time_info pvti; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) #ifdef CONFIG_PARAVIRT_CLOCK void pvclock_set_pvti_cpu0_va(struct pvclock_vsyscall_time_info *pvti); struct pvclock_vsyscall_time_info *pvclock_get_pvti_cpu0_va(void); #else static inline struct pvclock_vsyscall_time_info *pvclock_get_pvti_cpu0_va(void) { return NULL; } #endif #endif /* _ASM_X86_PVCLOCK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_FUTEX_H #define _ASM_X86_FUTEX_H #ifdef __KERNEL__ #include <linux/futex.h> #include <linux/uaccess.h> #include <asm/asm.h> #include <asm/errno.h> #include <asm/processor.h> #include <asm/smap.h> #define unsafe_atomic_op1(insn, oval, uaddr, oparg, label) \ do { \ int oldval = 0, ret; \ asm volatile("1:\t" insn "\n" \ "2:\n" \ "\t.section .fixup,\"ax\"\n" \ "3:\tmov\t%3, %1\n" \ "\tjmp\t2b\n" \ "\t.previous\n" \ _ASM_EXTABLE_UA(1b, 3b) \ : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ : "i" (-EFAULT), "0" (oparg), "1" (0)); \ if (ret) \ goto label; \ *oval = oldval; \ } while(0) #define unsafe_atomic_op2(insn, oval, uaddr, oparg, label) \ do { \ int oldval = 0, ret, tem; \ asm volatile("1:\tmovl %2, %0\n" \ "2:\tmovl\t%0, %3\n" \ "\t" insn "\n" \ "3:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ "\tjnz\t2b\n" \ "4:\n" \ "\t.section .fixup,\"ax\"\n" \ "5:\tmov\t%5, %1\n" \ "\tjmp\t4b\n" \ "\t.previous\n" \ _ASM_EXTABLE_UA(1b, 5b) \ _ASM_EXTABLE_UA(3b, 5b) \ : "=&a" (oldval), "=&r" (ret), \ "+m" (*uaddr), "=&r" (tem) \ : "r" (oparg), "i" (-EFAULT), "1" (0)); \ if (ret) \ goto label; \ *oval = oldval; \ } while(0) static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) { if (!user_access_begin(uaddr, sizeof(u32))) return -EFAULT; switch (op) { case FUTEX_OP_SET: unsafe_atomic_op1("xchgl %0, %2", oval, uaddr, oparg, Efault); break; case FUTEX_OP_ADD: unsafe_atomic_op1(LOCK_PREFIX "xaddl %0, %2", oval, uaddr, oparg, Efault); break; case FUTEX_OP_OR: unsafe_atomic_op2("orl %4, %3", oval, uaddr, oparg, Efault); break; case FUTEX_OP_ANDN: unsafe_atomic_op2("andl %4, %3", oval, uaddr, ~oparg, Efault); break; case FUTEX_OP_XOR: unsafe_atomic_op2("xorl %4, %3", oval, uaddr, oparg, Efault); break; default: user_access_end(); return -ENOSYS; } user_access_end(); return 0; Efault: user_access_end(); return -EFAULT; } static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { int ret = 0; if (!user_access_begin(uaddr, sizeof(u32))) return -EFAULT; asm volatile("\n" "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" "2:\n" "\t.section .fixup, \"ax\"\n" "3:\tmov %3, %0\n" "\tjmp 2b\n" "\t.previous\n" _ASM_EXTABLE_UA(1b, 3b) : "+r" (ret), "=a" (oldval), "+m" (*uaddr) : "i" (-EFAULT), "r" (newval), "1" (oldval) : "memory" ); user_access_end(); *uval = oldval; return ret; } #endif #endif /* _ASM_X86_FUTEX_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_SIGNAL_H #define _LINUX_SCHED_SIGNAL_H #include <linux/rculist.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/sched/jobctl.h> #include <linux/sched/task.h> #include <linux/cred.h> #include <linux/refcount.h> #include <linux/posix-timers.h> #include <linux/mm_types.h> #include <asm/ptrace.h> /* * Types defining task->signal and task->sighand and APIs using them: */ struct sighand_struct { spinlock_t siglock; refcount_t count; wait_queue_head_t signalfd_wqh; struct k_sigaction action[_NSIG]; }; /* * Per-process accounting stats: */ struct pacct_struct { int ac_flag; long ac_exitcode; unsigned long ac_mem; u64 ac_utime, ac_stime; unsigned long ac_minflt, ac_majflt; }; struct cpu_itimer { u64 expires; u64 incr; }; /* * This is the atomic variant of task_cputime, which can be used for * storing and updating task_cputime statistics without locking. */ struct task_cputime_atomic { atomic64_t utime; atomic64_t stime; atomic64_t sum_exec_runtime; }; #define INIT_CPUTIME_ATOMIC \ (struct task_cputime_atomic) { \ .utime = ATOMIC64_INIT(0), \ .stime = ATOMIC64_INIT(0), \ .sum_exec_runtime = ATOMIC64_INIT(0), \ } /** * struct thread_group_cputimer - thread group interval timer counts * @cputime_atomic: atomic thread group interval timers. * * This structure contains the version of task_cputime, above, that is * used for thread group CPU timer calculations. */ struct thread_group_cputimer { struct task_cputime_atomic cputime_atomic; }; struct multiprocess_signals { sigset_t signal; struct hlist_node node; }; /* * NOTE! "signal_struct" does not have its own * locking, because a shared signal_struct always * implies a shared sighand_struct, so locking * sighand_struct is always a proper superset of * the locking of signal_struct. */ struct signal_struct { refcount_t sigcnt; atomic_t live; int nr_threads; struct list_head thread_head; wait_queue_head_t wait_chldexit; /* for wait4() */ /* current thread group signal load-balancing target: */ struct task_struct *curr_target; /* shared signal handling: */ struct sigpending shared_pending; /* For collecting multiprocess signals during fork */ struct hlist_head multiprocess; /* thread group exit support */ int group_exit_code; /* overloaded: * - notify group_exit_task when ->count is equal to notify_count * - everyone except group_exit_task is stopped during signal delivery * of fatal signals, group_exit_task processes the signal. */ int notify_count; struct task_struct *group_exit_task; /* thread group stop support, overloads group_exit_code too */ int group_stop_count; unsigned int flags; /* see SIGNAL_* flags below */ /* * PR_SET_CHILD_SUBREAPER marks a process, like a service * manager, to re-parent orphan (double-forking) child processes * to this process instead of 'init'. The service manager is * able to receive SIGCHLD signals and is able to investigate * the process until it calls wait(). All children of this * process will inherit a flag if they should look for a * child_subreaper process at exit. */ unsigned int is_child_subreaper:1; unsigned int has_child_subreaper:1; #ifdef CONFIG_POSIX_TIMERS /* POSIX.1b Interval Timers */ int posix_timer_id; struct list_head posix_timers; /* ITIMER_REAL timer for the process */ struct hrtimer real_timer; ktime_t it_real_incr; /* * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these * values are defined to 0 and 1 respectively */ struct cpu_itimer it[2]; /* * Thread group totals for process CPU timers. * See thread_group_cputimer(), et al, for details. */ struct thread_group_cputimer cputimer; #endif /* Empty if CONFIG_POSIX_TIMERS=n */ struct posix_cputimers posix_cputimers; /* PID/PID hash table linkage. */ struct pid *pids[PIDTYPE_MAX]; #ifdef CONFIG_NO_HZ_FULL atomic_t tick_dep_mask; #endif struct pid *tty_old_pgrp; /* boolean value for session group leader */ int leader; struct tty_struct *tty; /* NULL if no tty */ #ifdef CONFIG_SCHED_AUTOGROUP struct autogroup *autogroup; #endif /* * Cumulative resource counters for dead threads in the group, * and for reaped dead child processes forked by this group. * Live threads maintain their own counters and add to these * in __exit_signal, except for the group leader. */ seqlock_t stats_lock; u64 utime, stime, cutime, cstime; u64 gtime; u64 cgtime; struct prev_cputime prev_cputime; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long inblock, oublock, cinblock, coublock; unsigned long maxrss, cmaxrss; struct task_io_accounting ioac; /* * Cumulative ns of schedule CPU time fo dead threads in the * group, not including a zombie group leader, (This only differs * from jiffies_to_ns(utime + stime) if sched_clock uses something * other than jiffies.) */ unsigned long long sum_sched_runtime; /* * We don't bother to synchronize most readers of this at all, * because there is no reader checking a limit that actually needs * to get both rlim_cur and rlim_max atomically, and either one * alone is a single word that can safely be read normally. * getrlimit/setrlimit use task_lock(current->group_leader) to * protect this instead of the siglock, because they really * have no need to disable irqs. */ struct rlimit rlim[RLIM_NLIMITS]; #ifdef CONFIG_BSD_PROCESS_ACCT struct pacct_struct pacct; /* per-process accounting information */ #endif #ifdef CONFIG_TASKSTATS struct taskstats *stats; #endif #ifdef CONFIG_AUDIT unsigned audit_tty; struct tty_audit_buf *tty_audit_buf; #endif /* * Thread is the potential origin of an oom condition; kill first on * oom */ bool oom_flag_origin; short oom_score_adj; /* OOM kill score adjustment */ short oom_score_adj_min; /* OOM kill score adjustment min value. * Only settable by CAP_SYS_RESOURCE. */ struct mm_struct *oom_mm; /* recorded mm when the thread group got * killed by the oom killer */ struct mutex cred_guard_mutex; /* guard against foreign influences on * credential calculations * (notably. ptrace) * Deprecated do not use in new code. * Use exec_update_lock instead. */ struct rw_semaphore exec_update_lock; /* Held while task_struct is * being updated during exec, * and may have inconsistent * permissions. */ } __randomize_layout; /* * Bits in flags field of signal_struct. */ #define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ #define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ #define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ #define SIGNAL_GROUP_COREDUMP 0x00000008 /* coredump in progress */ /* * Pending notifications to parent. */ #define SIGNAL_CLD_STOPPED 0x00000010 #define SIGNAL_CLD_CONTINUED 0x00000020 #define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) #define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ #define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \ SIGNAL_STOP_CONTINUED) static inline void signal_set_stop_flags(struct signal_struct *sig, unsigned int flags) { WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)); sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; } /* If true, all threads except ->group_exit_task have pending SIGKILL */ static inline int signal_group_exit(const struct signal_struct *sig) { return (sig->flags & SIGNAL_GROUP_EXIT) || (sig->group_exit_task != NULL); } extern void flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); extern int dequeue_signal(struct task_struct *task, sigset_t *mask, kernel_siginfo_t *info); static inline int kernel_dequeue_signal(void) { struct task_struct *task = current; kernel_siginfo_t __info; int ret; spin_lock_irq(&task->sighand->siglock); ret = dequeue_signal(task, &task->blocked, &__info); spin_unlock_irq(&task->sighand->siglock); return ret; } static inline void kernel_signal_stop(void) { spin_lock_irq(&current->sighand->siglock); if (current->jobctl & JOBCTL_STOP_DEQUEUED) set_special_state(TASK_STOPPED); spin_unlock_irq(&current->sighand->siglock); schedule(); } #ifdef __ARCH_SI_TRAPNO # define ___ARCH_SI_TRAPNO(_a1) , _a1 #else # define ___ARCH_SI_TRAPNO(_a1) #endif #ifdef __ia64__ # define ___ARCH_SI_IA64(_a1, _a2, _a3) , _a1, _a2, _a3 #else # define ___ARCH_SI_IA64(_a1, _a2, _a3) #endif int force_sig_fault_to_task(int sig, int code, void __user *addr ___ARCH_SI_TRAPNO(int trapno) ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) , struct task_struct *t); int force_sig_fault(int sig, int code, void __user *addr ___ARCH_SI_TRAPNO(int trapno) ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)); int send_sig_fault(int sig, int code, void __user *addr ___ARCH_SI_TRAPNO(int trapno) ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) , struct task_struct *t); int force_sig_mceerr(int code, void __user *, short); int send_sig_mceerr(int code, void __user *, short, struct task_struct *); int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper); int force_sig_pkuerr(void __user *addr, u32 pkey); int force_sig_ptrace_errno_trap(int errno, void __user *addr); extern int send_sig_info(int, struct kernel_siginfo *, struct task_struct *); extern void force_sigsegv(int sig); extern int force_sig_info(struct kernel_siginfo *); extern int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp); extern int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid); extern int kill_pid_usb_asyncio(int sig, int errno, sigval_t addr, struct pid *, const struct cred *); extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); extern __must_check bool do_notify_parent(struct task_struct *, int); extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); extern void force_sig(int); extern int send_sig(int, struct task_struct *, int); extern int zap_other_threads(struct task_struct *p); extern struct sigqueue *sigqueue_alloc(void); extern void sigqueue_free(struct sigqueue *); extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type); extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); static inline int restart_syscall(void) { set_tsk_thread_flag(current, TIF_SIGPENDING); return -ERESTARTNOINTR; } static inline int signal_pending(struct task_struct *p) { return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); } static inline int __fatal_signal_pending(struct task_struct *p) { return unlikely(sigismember(&p->pending.signal, SIGKILL)); } static inline int fatal_signal_pending(struct task_struct *p) { return signal_pending(p) && __fatal_signal_pending(p); } static inline int signal_pending_state(long state, struct task_struct *p) { if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) return 0; if (!signal_pending(p)) return 0; return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); } /* * This should only be used in fault handlers to decide whether we * should stop the current fault routine to handle the signals * instead, especially with the case where we've got interrupted with * a VM_FAULT_RETRY. */ static inline bool fault_signal_pending(vm_fault_t fault_flags, struct pt_regs *regs) { return unlikely((fault_flags & VM_FAULT_RETRY) && (fatal_signal_pending(current) || (user_mode(regs) && signal_pending(current)))); } /* * Reevaluate whether the task has signals pending delivery. * Wake the task if so. * This is required every time the blocked sigset_t changes. * callers must hold sighand->siglock. */ extern void recalc_sigpending_and_wake(struct task_struct *t); extern void recalc_sigpending(void); extern void calculate_sigpending(void); extern void signal_wake_up_state(struct task_struct *t, unsigned int state); static inline void signal_wake_up(struct task_struct *t, bool resume) { signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0); } static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) { signal_wake_up_state(t, resume ? __TASK_TRACED : 0); } void task_join_group_stop(struct task_struct *task); #ifdef TIF_RESTORE_SIGMASK /* * Legacy restore_sigmask accessors. These are inefficient on * SMP architectures because they require atomic operations. */ /** * set_restore_sigmask() - make sure saved_sigmask processing gets done * * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code * will run before returning to user mode, to process the flag. For * all callers, TIF_SIGPENDING is already set or it's no harm to set * it. TIF_RESTORE_SIGMASK need not be in the set of bits that the * arch code will notice on return to user mode, in case those bits * are scarce. We set TIF_SIGPENDING here to ensure that the arch * signal code always gets run when TIF_RESTORE_SIGMASK is set. */ static inline void set_restore_sigmask(void) { set_thread_flag(TIF_RESTORE_SIGMASK); } static inline void clear_tsk_restore_sigmask(struct task_struct *task) { clear_tsk_thread_flag(task, TIF_RESTORE_SIGMASK); } static inline void clear_restore_sigmask(void) { clear_thread_flag(TIF_RESTORE_SIGMASK); } static inline bool test_tsk_restore_sigmask(struct task_struct *task) { return test_tsk_thread_flag(task, TIF_RESTORE_SIGMASK); } static inline bool test_restore_sigmask(void) { return test_thread_flag(TIF_RESTORE_SIGMASK); } static inline bool test_and_clear_restore_sigmask(void) { return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK); } #else /* TIF_RESTORE_SIGMASK */ /* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */ static inline void set_restore_sigmask(void) { current->restore_sigmask = true; } static inline void clear_tsk_restore_sigmask(struct task_struct *task) { task->restore_sigmask = false; } static inline void clear_restore_sigmask(void) { current->restore_sigmask = false; } static inline bool test_restore_sigmask(void) { return current->restore_sigmask; } static inline bool test_tsk_restore_sigmask(struct task_struct *task) { return task->restore_sigmask; } static inline bool test_and_clear_restore_sigmask(void) { if (!current->restore_sigmask) return false; current->restore_sigmask = false; return true; } #endif static inline void restore_saved_sigmask(void) { if (test_and_clear_restore_sigmask()) __set_current_blocked(&current->saved_sigmask); } extern int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize); static inline void restore_saved_sigmask_unless(bool interrupted) { if (interrupted) WARN_ON(!test_thread_flag(TIF_SIGPENDING)); else restore_saved_sigmask(); } static inline sigset_t *sigmask_to_save(void) { sigset_t *res = &current->blocked; if (unlikely(test_restore_sigmask())) res = &current->saved_sigmask; return res; } static inline int kill_cad_pid(int sig, int priv) { return kill_pid(cad_pid, sig, priv); } /* These can be the second arg to send_sig_info/send_group_sig_info. */ #define SEND_SIG_NOINFO ((struct kernel_siginfo *) 0) #define SEND_SIG_PRIV ((struct kernel_siginfo *) 1) static inline int __on_sig_stack(unsigned long sp) { #ifdef CONFIG_STACK_GROWSUP return sp >= current->sas_ss_sp && sp - current->sas_ss_sp < current->sas_ss_size; #else return sp > current->sas_ss_sp && sp - current->sas_ss_sp <= current->sas_ss_size; #endif } /* * True if we are on the alternate signal stack. */ static inline int on_sig_stack(unsigned long sp) { /* * If the signal stack is SS_AUTODISARM then, by construction, we * can't be on the signal stack unless user code deliberately set * SS_AUTODISARM when we were already on it. * * This improves reliability: if user state gets corrupted such that * the stack pointer points very close to the end of the signal stack, * then this check will enable the signal to be handled anyway. */ if (current->sas_ss_flags & SS_AUTODISARM) return 0; return __on_sig_stack(sp); } static inline int sas_ss_flags(unsigned long sp) { if (!current->sas_ss_size) return SS_DISABLE; return on_sig_stack(sp) ? SS_ONSTACK : 0; } static inline void sas_ss_reset(struct task_struct *p) { p->sas_ss_sp = 0; p->sas_ss_size = 0; p->sas_ss_flags = SS_DISABLE; } static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) { if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp)) #ifdef CONFIG_STACK_GROWSUP return current->sas_ss_sp; #else return current->sas_ss_sp + current->sas_ss_size; #endif return sp; } extern void __cleanup_sighand(struct sighand_struct *); extern void flush_itimer_signals(void); #define tasklist_empty() \ list_empty(&init_task.tasks) #define next_task(p) \ list_entry_rcu((p)->tasks.next, struct task_struct, tasks) #define for_each_process(p) \ for (p = &init_task ; (p = next_task(p)) != &init_task ; ) extern bool current_is_single_threaded(void); /* * Careful: do_each_thread/while_each_thread is a double loop so * 'break' will not work as expected - use goto instead. */ #define do_each_thread(g, t) \ for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do #define while_each_thread(g, t) \ while ((t = next_thread(t)) != g) #define __for_each_thread(signal, t) \ list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) #define for_each_thread(p, t) \ __for_each_thread((p)->signal, t) /* Careful: this is a double loop, 'break' won't work as expected. */ #define for_each_process_thread(p, t) \ for_each_process(p) for_each_thread(p, t) typedef int (*proc_visitor)(struct task_struct *p, void *data); void walk_process_tree(struct task_struct *top, proc_visitor, void *); static inline struct pid *task_pid_type(struct task_struct *task, enum pid_type type) { struct pid *pid; if (type == PIDTYPE_PID) pid = task_pid(task); else pid = task->signal->pids[type]; return pid; } static inline struct pid *task_tgid(struct task_struct *task) { return task->signal->pids[PIDTYPE_TGID]; } /* * Without tasklist or RCU lock it is not safe to dereference * the result of task_pgrp/task_session even if task == current, * we can race with another thread doing sys_setsid/sys_setpgid. */ static inline struct pid *task_pgrp(struct task_struct *task) { return task->signal->pids[PIDTYPE_PGID]; } static inline struct pid *task_session(struct task_struct *task) { return task->signal->pids[PIDTYPE_SID]; } static inline int get_nr_threads(struct task_struct *task) { return task->signal->nr_threads; } static inline bool thread_group_leader(struct task_struct *p) { return p->exit_signal >= 0; } static inline bool same_thread_group(struct task_struct *p1, struct task_struct *p2) { return p1->signal == p2->signal; } static inline struct task_struct *next_thread(const struct task_struct *p) { return list_entry_rcu(p->thread_group.next, struct task_struct, thread_group); } static inline int thread_group_empty(struct task_struct *p) { return list_empty(&p->thread_group); } #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) extern bool thread_group_exited(struct pid *pid); extern struct sighand_struct *__lock_task_sighand(struct task_struct *task, unsigned long *flags); static inline struct sighand_struct *lock_task_sighand(struct task_struct *task, unsigned long *flags) { struct sighand_struct *ret; ret = __lock_task_sighand(task, flags); (void)__cond_lock(&task->sighand->siglock, ret); return ret; } static inline void unlock_task_sighand(struct task_struct *task, unsigned long *flags) { spin_unlock_irqrestore(&task->sighand->siglock, *flags); } static inline unsigned long task_rlimit(const struct task_struct *task, unsigned int limit) { return READ_ONCE(task->signal->rlim[limit].rlim_cur); } static inline unsigned long task_rlimit_max(const struct task_struct *task, unsigned int limit) { return READ_ONCE(task->signal->rlim[limit].rlim_max); } static inline unsigned long rlimit(unsigned int limit) { return task_rlimit(current, limit); } static inline unsigned long rlimit_max(unsigned int limit) { return task_rlimit_max(current, limit); } #endif /* _LINUX_SCHED_SIGNAL_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_UACCESS_H__ #define __LINUX_UACCESS_H__ #include <linux/fault-inject-usercopy.h> #include <linux/instrumented.h> #include <linux/minmax.h> #include <linux/sched.h> #include <linux/thread_info.h> #include <asm/uaccess.h> #ifdef CONFIG_SET_FS /* * Force the uaccess routines to be wired up for actual userspace access, * overriding any possible set_fs(KERNEL_DS) still lingering around. Undone * using force_uaccess_end below. */ static inline mm_segment_t force_uaccess_begin(void) { mm_segment_t fs = get_fs(); set_fs(USER_DS); return fs; } static inline void force_uaccess_end(mm_segment_t oldfs) { set_fs(oldfs); } #else /* CONFIG_SET_FS */ typedef struct { /* empty dummy */ } mm_segment_t; #ifndef TASK_SIZE_MAX #define TASK_SIZE_MAX TASK_SIZE #endif #define uaccess_kernel() (false) #define user_addr_max() (TASK_SIZE_MAX) static inline mm_segment_t force_uaccess_begin(void) { return (mm_segment_t) { }; } static inline void force_uaccess_end(mm_segment_t oldfs) { } #endif /* CONFIG_SET_FS */ /* * Architectures should provide two primitives (raw_copy_{to,from}_user()) * and get rid of their private instances of copy_{to,from}_user() and * __copy_{to,from}_user{,_inatomic}(). * * raw_copy_{to,from}_user(to, from, size) should copy up to size bytes and * return the amount left to copy. They should assume that access_ok() has * already been checked (and succeeded); they should *not* zero-pad anything. * No KASAN or object size checks either - those belong here. * * Both of these functions should attempt to copy size bytes starting at from * into the area starting at to. They must not fetch or store anything * outside of those areas. Return value must be between 0 (everything * copied successfully) and size (nothing copied). * * If raw_copy_{to,from}_user(to, from, size) returns N, size - N bytes starting * at to must become equal to the bytes fetched from the corresponding area * starting at from. All data past to + size - N must be left unmodified. * * If copying succeeds, the return value must be 0. If some data cannot be * fetched, it is permitted to copy less than had been fetched; the only * hard requirement is that not storing anything at all (i.e. returning size) * should happen only when nothing could be copied. In other words, you don't * have to squeeze as much as possible - it is allowed, but not necessary. * * For raw_copy_from_user() to always points to kernel memory and no faults * on store should happen. Interpretation of from is affected by set_fs(). * For raw_copy_to_user() it's the other way round. * * Both can be inlined - it's up to architectures whether it wants to bother * with that. They should not be used directly; they are used to implement * the 6 functions (copy_{to,from}_user(), __copy_{to,from}_user_inatomic()) * that are used instead. Out of those, __... ones are inlined. Plain * copy_{to,from}_user() might or might not be inlined. If you want them * inlined, have asm/uaccess.h define INLINE_COPY_{TO,FROM}_USER. * * NOTE: only copy_from_user() zero-pads the destination in case of short copy. * Neither __copy_from_user() nor __copy_from_user_inatomic() zero anything * at all; their callers absolutely must check the return value. * * Biarch ones should also provide raw_copy_in_user() - similar to the above, * but both source and destination are __user pointers (affected by set_fs() * as usual) and both source and destination can trigger faults. */ static __always_inline __must_check unsigned long __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) { instrument_copy_from_user(to, from, n); check_object_size(to, n, false); return raw_copy_from_user(to, from, n); } static __always_inline __must_check unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { might_fault(); if (should_fail_usercopy()) return n; instrument_copy_from_user(to, from, n); check_object_size(to, n, false); return raw_copy_from_user(to, from, n); } /** * __copy_to_user_inatomic: - Copy a block of data into user space, with less checking. * @to: Destination address, in user space. * @from: Source address, in kernel space. * @n: Number of bytes to copy. * * Context: User context only. * * Copy data from kernel space to user space. Caller must check * the specified block with access_ok() before calling this function. * The caller should also make sure he pins the user space address * so that we don't result in page fault and sleep. */ static __always_inline __must_check unsigned long __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) { if (should_fail_usercopy()) return n; instrument_copy_to_user(to, from, n); check_object_size(from, n, true); return raw_copy_to_user(to, from, n); } static __always_inline __must_check unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); if (should_fail_usercopy()) return n; instrument_copy_to_user(to, from, n); check_object_size(from, n, true); return raw_copy_to_user(to, from, n); } #ifdef INLINE_COPY_FROM_USER static inline __must_check unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); if (!should_fail_usercopy() && likely(access_ok(from, n))) { instrument_copy_from_user(to, from, n); res = raw_copy_from_user(to, from, n); } if (unlikely(res)) memset(to + (n - res), 0, res); return res; } #else extern __must_check unsigned long _copy_from_user(void *, const void __user *, unsigned long); #endif #ifdef INLINE_COPY_TO_USER static inline __must_check unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); if (should_fail_usercopy()) return n; if (access_ok(to, n)) { instrument_copy_to_user(to, from, n); n = raw_copy_to_user(to, from, n); } return n; } #else extern __must_check unsigned long _copy_to_user(void __user *, const void *, unsigned long); #endif static __always_inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { if (likely(check_copy_size(to, n, false))) n = _copy_from_user(to, from, n); return n; } static __always_inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { if (likely(check_copy_size(from, n, true))) n = _copy_to_user(to, from, n); return n; } #ifdef CONFIG_COMPAT static __always_inline unsigned long __must_check copy_in_user(void __user *to, const void __user *from, unsigned long n) { might_fault(); if (access_ok(to, n) && access_ok(from, n)) n = raw_copy_in_user(to, from, n); return n; } #endif #ifndef copy_mc_to_kernel /* * Without arch opt-in this generic copy_mc_to_kernel() will not handle * #MC (or arch equivalent) during source read. */ static inline unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, size_t cnt) { memcpy(dst, src, cnt); return 0; } #endif static __always_inline void pagefault_disabled_inc(void) { current->pagefault_disabled++; } static __always_inline void pagefault_disabled_dec(void) { current->pagefault_disabled--; } /* * These routines enable/disable the pagefault handler. If disabled, it will * not take any locks and go straight to the fixup table. * * User access methods will not sleep when called from a pagefault_disabled() * environment. */ static inline void pagefault_disable(void) { pagefault_disabled_inc(); /* * make sure to have issued the store before a pagefault * can hit. */ barrier(); } static inline void pagefault_enable(void) { /* * make sure to issue those last loads/stores before enabling * the pagefault handler again. */ barrier(); pagefault_disabled_dec(); } /* * Is the pagefault handler disabled? If so, user access methods will not sleep. */ static inline bool pagefault_disabled(void) { return current->pagefault_disabled != 0; } /* * The pagefault handler is in general disabled by pagefault_disable() or * when in irq context (via in_atomic()). * * This function should only be used by the fault handlers. Other users should * stick to pagefault_disabled(). * Please NEVER use preempt_disable() to disable the fault handler. With * !CONFIG_PREEMPT_COUNT, this is like a NOP. So the handler won't be disabled. * in_atomic() will report different values based on !CONFIG_PREEMPT_COUNT. */ #define faulthandler_disabled() (pagefault_disabled() || in_atomic()) #ifndef ARCH_HAS_NOCACHE_UACCESS static inline __must_check unsigned long __copy_from_user_inatomic_nocache(void *to, const void __user *from, unsigned long n) { return __copy_from_user_inatomic(to, from, n); } #endif /* ARCH_HAS_NOCACHE_UACCESS */ extern __must_check int check_zeroed_user(const void __user *from, size_t size); /** * copy_struct_from_user: copy a struct from userspace * @dst: Destination address, in kernel space. This buffer must be @ksize * bytes long. * @ksize: Size of @dst struct. * @src: Source address, in userspace. * @usize: (Alleged) size of @src struct. * * Copies a struct from userspace to kernel space, in a way that guarantees * backwards-compatibility for struct syscall arguments (as long as future * struct extensions are made such that all new fields are *appended* to the * old struct, and zeroed-out new fields have the same meaning as the old * struct). * * @ksize is just sizeof(*dst), and @usize should've been passed by userspace. * The recommended usage is something like the following: * * SYSCALL_DEFINE2(foobar, const struct foo __user *, uarg, size_t, usize) * { * int err; * struct foo karg = {}; * * if (usize > PAGE_SIZE) * return -E2BIG; * if (usize < FOO_SIZE_VER0) * return -EINVAL; * * err = copy_struct_from_user(&karg, sizeof(karg), uarg, usize); * if (err) * return err; * * // ... * } * * There are three cases to consider: * * If @usize == @ksize, then it's copied verbatim. * * If @usize < @ksize, then the userspace has passed an old struct to a * newer kernel. The rest of the trailing bytes in @dst (@ksize - @usize) * are to be zero-filled. * * If @usize > @ksize, then the userspace has passed a new struct to an * older kernel. The trailing bytes unknown to the kernel (@usize - @ksize) * are checked to ensure they are zeroed, otherwise -E2BIG is returned. * * Returns (in all cases, some data may have been copied): * * -E2BIG: (@usize > @ksize) and there are non-zero trailing bytes in @src. * * -EFAULT: access to userspace failed. */ static __always_inline __must_check int copy_struct_from_user(void *dst, size_t ksize, const void __user *src, size_t usize) { size_t size = min(ksize, usize); size_t rest = max(ksize, usize) - size; /* Deal with trailing bytes. */ if (usize < ksize) { memset(dst + size, 0, rest); } else if (usize > ksize) { int ret = check_zeroed_user(src + size, rest); if (ret <= 0) return ret ?: -E2BIG; } /* Copy the interoperable parts of the struct. */ if (copy_from_user(dst, src, size)) return -EFAULT; return 0; } bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size); long copy_from_kernel_nofault(void *dst, const void *src, size_t size); long notrace copy_to_kernel_nofault(void *dst, const void *src, size_t size); long copy_from_user_nofault(void *dst, const void __user *src, size_t size); long notrace copy_to_user_nofault(void __user *dst, const void *src, size_t size); long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count); long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, long count); long strnlen_user_nofault(const void __user *unsafe_addr, long count); /** * get_kernel_nofault(): safely attempt to read from a location * @val: read into this variable * @ptr: address to read from * * Returns 0 on success, or -EFAULT. */ #define get_kernel_nofault(val, ptr) ({ \ const typeof(val) *__gk_ptr = (ptr); \ copy_from_kernel_nofault(&(val), __gk_ptr, sizeof(val));\ }) #ifndef user_access_begin #define user_access_begin(ptr,len) access_ok(ptr, len) #define user_access_end() do { } while (0) #define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0) #define unsafe_get_user(x,p,e) unsafe_op_wrap(__get_user(x,p),e) #define unsafe_put_user(x,p,e) unsafe_op_wrap(__put_user(x,p),e) #define unsafe_copy_to_user(d,s,l,e) unsafe_op_wrap(__copy_to_user(d,s,l),e) static inline unsigned long user_access_save(void) { return 0UL; } static inline void user_access_restore(unsigned long flags) { } #endif #ifndef user_write_access_begin #define user_write_access_begin user_access_begin #define user_write_access_end user_access_end #endif #ifndef user_read_access_begin #define user_read_access_begin user_access_begin #define user_read_access_end user_access_end #endif #ifdef CONFIG_HARDENED_USERCOPY void usercopy_warn(const char *name, const char *detail, bool to_user, unsigned long offset, unsigned long len); void __noreturn usercopy_abort(const char *name, const char *detail, bool to_user, unsigned long offset, unsigned long len); #endif #endif /* __LINUX_UACCESS_H__ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Generic associative array implementation. * * See Documentation/core-api/assoc_array.rst for information. * * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef _LINUX_ASSOC_ARRAY_H #define _LINUX_ASSOC_ARRAY_H #ifdef CONFIG_ASSOCIATIVE_ARRAY #include <linux/types.h> #define ASSOC_ARRAY_KEY_CHUNK_SIZE BITS_PER_LONG /* Key data retrieved in chunks of this size */ /* * Generic associative array. */ struct assoc_array { struct assoc_array_ptr *root; /* The node at the root of the tree */ unsigned long nr_leaves_on_tree; }; /* * Operations on objects and index keys for use by array manipulation routines. */ struct assoc_array_ops { /* Method to get a chunk of an index key from caller-supplied data */ unsigned long (*get_key_chunk)(const void *index_key, int level); /* Method to get a piece of an object's index key */ unsigned long (*get_object_key_chunk)(const void *object, int level); /* Is this the object we're looking for? */ bool (*compare_object)(const void *object, const void *index_key); /* How different is an object from an index key, to a bit position in * their keys? (or -1 if they're the same) */ int (*diff_objects)(const void *object, const void *index_key); /* Method to free an object. */ void (*free_object)(void *object); }; /* * Access and manipulation functions. */ struct assoc_array_edit; static inline void assoc_array_init(struct assoc_array *array) { array->root = NULL; array->nr_leaves_on_tree = 0; } extern int assoc_array_iterate(const struct assoc_array *array, int (*iterator)(const void *object, void *iterator_data), void *iterator_data); extern void *assoc_array_find(const struct assoc_array *array, const struct assoc_array_ops *ops, const void *index_key); extern void assoc_array_destroy(struct assoc_array *array, const struct assoc_array_ops *ops); extern struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, const struct assoc_array_ops *ops, const void *index_key, void *object); extern void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object); extern struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, const struct assoc_array_ops *ops, const void *index_key); extern struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, const struct assoc_array_ops *ops); extern void assoc_array_apply_edit(struct assoc_array_edit *edit); extern void assoc_array_cancel_edit(struct assoc_array_edit *edit); extern int assoc_array_gc(struct assoc_array *array, const struct assoc_array_ops *ops, bool (*iterator)(void *object, void *iterator_data), void *iterator_data); #endif /* CONFIG_ASSOCIATIVE_ARRAY */ #endif /* _LINUX_ASSOC_ARRAY_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 /* SPDX-License-Identifier: GPL-2.0 */ /* * A hash table (hashtab) maintains associations between * key values and datum values. The type of the key values * and the type of the datum values is arbitrary. The * functions for hash computation and key comparison are * provided by the creator of the table. * * Author : Stephen Smalley, <sds@tycho.nsa.gov> */ #ifndef _SS_HASHTAB_H_ #define _SS_HASHTAB_H_ #include <linux/types.h> #include <linux/errno.h> #include <linux/sched.h> #define HASHTAB_MAX_NODES U32_MAX struct hashtab_key_params { u32 (*hash)(const void *key); /* hash function */ int (*cmp)(const void *key1, const void *key2); /* key comparison function */ }; struct hashtab_node { void *key; void *datum; struct hashtab_node *next; }; struct hashtab { struct hashtab_node **htable; /* hash table */ u32 size; /* number of slots in hash table */ u32 nel; /* number of elements in hash table */ }; struct hashtab_info { u32 slots_used; u32 max_chain_len; }; /* * Initializes a new hash table with the specified characteristics. * * Returns -ENOMEM if insufficient space is available or 0 otherwise. */ int hashtab_init(struct hashtab *h, u32 nel_hint); int __hashtab_insert(struct hashtab *h, struct hashtab_node **dst, void *key, void *datum); /* * Inserts the specified (key, datum) pair into the specified hash table. * * Returns -ENOMEM on memory allocation error, * -EEXIST if there is already an entry with the same key, * -EINVAL for general errors or 0 otherwise. */ static inline int hashtab_insert(struct hashtab *h, void *key, void *datum, struct hashtab_key_params key_params) { u32 hvalue; struct hashtab_node *prev, *cur; cond_resched(); if (!h->size || h->nel == HASHTAB_MAX_NODES) return -EINVAL; hvalue = key_params.hash(key) & (h->size - 1); prev = NULL; cur = h->htable[hvalue]; while (cur) { int cmp = key_params.cmp(key, cur->key); if (cmp == 0) return -EEXIST; if (cmp < 0) break; prev = cur; cur = cur->next; } return __hashtab_insert(h, prev ? &prev->next : &h->htable[hvalue], key, datum); } /* * Searches for the entry with the specified key in the hash table. * * Returns NULL if no entry has the specified key or * the datum of the entry otherwise. */ static inline void *hashtab_search(struct hashtab *h, const void *key, struct hashtab_key_params key_params) { u32 hvalue; struct hashtab_node *cur; if (!h->size) return NULL; hvalue = key_params.hash(key) & (h->size - 1); cur = h->htable[hvalue]; while (cur) { int cmp = key_params.cmp(key, cur->key); if (cmp == 0) return cur->datum; if (cmp < 0) break; cur = cur->next; } return NULL; } /* * Destroys the specified hash table. */ void hashtab_destroy(struct hashtab *h); /* * Applies the specified apply function to (key,datum,args) * for each entry in the specified hash table. * * The order in which the function is applied to the entries * is dependent upon the internal structure of the hash table. * * If apply returns a non-zero status, then hashtab_map will cease * iterating through the hash table and will propagate the error * return to its caller. */ int hashtab_map(struct hashtab *h, int (*apply)(void *k, void *d, void *args), void *args); int hashtab_duplicate(struct hashtab *new, struct hashtab *orig, int (*copy)(struct hashtab_node *new, struct hashtab_node *orig, void *args), int (*destroy)(void *k, void *d, void *args), void *args); /* Fill info with some hash table statistics */ void hashtab_stat(struct hashtab *h, struct hashtab_info *info); #endif /* _SS_HASHTAB_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Access to user system call parameters and results * * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved. * * See asm-generic/syscall.h for descriptions of what we must do here. */ #ifndef _ASM_X86_SYSCALL_H #define _ASM_X86_SYSCALL_H #include <uapi/linux/audit.h> #include <linux/sched.h> #include <linux/err.h> #include <asm/thread_info.h> /* for TS_COMPAT */ #include <asm/unistd.h> typedef long (*sys_call_ptr_t)(const struct pt_regs *); extern const sys_call_ptr_t sys_call_table[]; #if defined(CONFIG_X86_32) #define ia32_sys_call_table sys_call_table #endif #if defined(CONFIG_IA32_EMULATION) extern const sys_call_ptr_t ia32_sys_call_table[]; #endif #ifdef CONFIG_X86_X32_ABI extern const sys_call_ptr_t x32_sys_call_table[]; #endif /* * Only the low 32 bits of orig_ax are meaningful, so we return int. * This importantly ignores the high bits on 64-bit, so comparisons * sign-extend the low 32 bits. */ static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { return regs->orig_ax; } static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { regs->ax = regs->orig_ax; } static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { unsigned long error = regs->ax; #ifdef CONFIG_IA32_EMULATION /* * TS_COMPAT is set for 32-bit syscall entries and then * remains set until we return to user mode. */ if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED)) /* * Sign-extend the value so (int)-EFOO becomes (long)-EFOO * and will match correctly in comparisons. */ error = (long) (int) error; #endif return IS_ERR_VALUE(error) ? error : 0; } static inline long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs) { return regs->ax; } static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { regs->ax = (long) error ?: val; } #ifdef CONFIG_X86_32 static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, unsigned long *args) { memcpy(args, &regs->bx, 6 * sizeof(args[0])); } static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, unsigned int i, unsigned int n, const unsigned long *args) { BUG_ON(i + n > 6); memcpy(&regs->bx + i, args, n * sizeof(args[0])); } static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_I386; } #else /* CONFIG_X86_64 */ static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, unsigned long *args) { # ifdef CONFIG_IA32_EMULATION if (task->thread_info.status & TS_COMPAT) { *args++ = regs->bx; *args++ = regs->cx; *args++ = regs->dx; *args++ = regs->si; *args++ = regs->di; *args = regs->bp; } else # endif { *args++ = regs->di; *args++ = regs->si; *args++ = regs->dx; *args++ = regs->r10; *args++ = regs->r8; *args = regs->r9; } } static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, const unsigned long *args) { # ifdef CONFIG_IA32_EMULATION if (task->thread_info.status & TS_COMPAT) { regs->bx = *args++; regs->cx = *args++; regs->dx = *args++; regs->si = *args++; regs->di = *args++; regs->bp = *args; } else # endif { regs->di = *args++; regs->si = *args++; regs->dx = *args++; regs->r10 = *args++; regs->r8 = *args++; regs->r9 = *args; } } static inline int syscall_get_arch(struct task_struct *task) { /* x32 tasks should be considered AUDIT_ARCH_X86_64. */ return (IS_ENABLED(CONFIG_IA32_EMULATION) && task->thread_info.status & TS_COMPAT) ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; } void do_syscall_64(unsigned long nr, struct pt_regs *regs); void do_int80_syscall_32(struct pt_regs *regs); long do_fast_syscall_32(struct pt_regs *regs); #endif /* CONFIG_X86_32 */ #endif /* _ASM_X86_SYSCALL_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM ext4 #if !defined(_TRACE_EXT4_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_EXT4_H #include <linux/writeback.h> #include <linux/tracepoint.h> struct ext4_allocation_context; struct ext4_allocation_request; struct ext4_extent; struct ext4_prealloc_space; struct ext4_inode_info; struct mpage_da_data; struct ext4_map_blocks; struct extent_status; struct ext4_fsmap; struct partial_cluster; #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) #define show_mballoc_flags(flags) __print_flags(flags, "|", \ { EXT4_MB_HINT_MERGE, "HINT_MERGE" }, \ { EXT4_MB_HINT_RESERVED, "HINT_RESV" }, \ { EXT4_MB_HINT_METADATA, "HINT_MDATA" }, \ { EXT4_MB_HINT_FIRST, "HINT_FIRST" }, \ { EXT4_MB_HINT_BEST, "HINT_BEST" }, \ { EXT4_MB_HINT_DATA, "HINT_DATA" }, \ { EXT4_MB_HINT_NOPREALLOC, "HINT_NOPREALLOC" }, \ { EXT4_MB_HINT_GROUP_ALLOC, "HINT_GRP_ALLOC" }, \ { EXT4_MB_HINT_GOAL_ONLY, "HINT_GOAL_ONLY" }, \ { EXT4_MB_HINT_TRY_GOAL, "HINT_TRY_GOAL" }, \ { EXT4_MB_DELALLOC_RESERVED, "DELALLOC_RESV" }, \ { EXT4_MB_STREAM_ALLOC, "STREAM_ALLOC" }, \ { EXT4_MB_USE_ROOT_BLOCKS, "USE_ROOT_BLKS" }, \ { EXT4_MB_USE_RESERVED, "USE_RESV" }, \ { EXT4_MB_STRICT_CHECK, "STRICT_CHECK" }) #define show_map_flags(flags) __print_flags(flags, "|", \ { EXT4_GET_BLOCKS_CREATE, "CREATE" }, \ { EXT4_GET_BLOCKS_UNWRIT_EXT, "UNWRIT" }, \ { EXT4_GET_BLOCKS_DELALLOC_RESERVE, "DELALLOC" }, \ { EXT4_GET_BLOCKS_PRE_IO, "PRE_IO" }, \ { EXT4_GET_BLOCKS_CONVERT, "CONVERT" }, \ { EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \ { EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \ { EXT4_GET_BLOCKS_CONVERT_UNWRITTEN, "CONVERT_UNWRITTEN" }, \ { EXT4_GET_BLOCKS_ZERO, "ZERO" }, \ { EXT4_GET_BLOCKS_IO_SUBMIT, "IO_SUBMIT" }, \ { EXT4_EX_NOCACHE, "EX_NOCACHE" }) /* * __print_flags() requires that all enum values be wrapped in the * TRACE_DEFINE_ENUM macro so that the enum value can be encoded in the ftrace * ring buffer. */ TRACE_DEFINE_ENUM(BH_New); TRACE_DEFINE_ENUM(BH_Mapped); TRACE_DEFINE_ENUM(BH_Unwritten); TRACE_DEFINE_ENUM(BH_Boundary); #define show_mflags(flags) __print_flags(flags, "", \ { EXT4_MAP_NEW, "N" }, \ { EXT4_MAP_MAPPED, "M" }, \ { EXT4_MAP_UNWRITTEN, "U" }, \ { EXT4_MAP_BOUNDARY, "B" }) #define show_free_flags(flags) __print_flags(flags, "|", \ { EXT4_FREE_BLOCKS_METADATA, "METADATA" }, \ { EXT4_FREE_BLOCKS_FORGET, "FORGET" }, \ { EXT4_FREE_BLOCKS_VALIDATED, "VALIDATED" }, \ { EXT4_FREE_BLOCKS_NO_QUOT_UPDATE, "NO_QUOTA" }, \ { EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER,"1ST_CLUSTER" },\ { EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER, "LAST_CLUSTER" }) TRACE_DEFINE_ENUM(ES_WRITTEN_B); TRACE_DEFINE_ENUM(ES_UNWRITTEN_B); TRACE_DEFINE_ENUM(ES_DELAYED_B); TRACE_DEFINE_ENUM(ES_HOLE_B); TRACE_DEFINE_ENUM(ES_REFERENCED_B); #define show_extent_status(status) __print_flags(status, "", \ { EXTENT_STATUS_WRITTEN, "W" }, \ { EXTENT_STATUS_UNWRITTEN, "U" }, \ { EXTENT_STATUS_DELAYED, "D" }, \ { EXTENT_STATUS_HOLE, "H" }, \ { EXTENT_STATUS_REFERENCED, "R" }) #define show_falloc_mode(mode) __print_flags(mode, "|", \ { FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \ { FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \ { FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}, \ { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \ { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) #define show_fc_reason(reason) \ __print_symbolic(reason, \ { EXT4_FC_REASON_XATTR, "XATTR"}, \ { EXT4_FC_REASON_CROSS_RENAME, "CROSS_RENAME"}, \ { EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, "JOURNAL_FLAG_CHANGE"}, \ { EXT4_FC_REASON_NOMEM, "NO_MEM"}, \ { EXT4_FC_REASON_SWAP_BOOT, "SWAP_BOOT"}, \ { EXT4_FC_REASON_RESIZE, "RESIZE"}, \ { EXT4_FC_REASON_RENAME_DIR, "RENAME_DIR"}, \ { EXT4_FC_REASON_FALLOC_RANGE, "FALLOC_RANGE"}, \ { EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"}) TRACE_EVENT(ext4_other_inode_update_time, TP_PROTO(struct inode *inode, ino_t orig_ino), TP_ARGS(inode, orig_ino), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ino_t, orig_ino ) __field( uid_t, uid ) __field( gid_t, gid ) __field( __u16, mode ) ), TP_fast_assign( __entry->orig_ino = orig_ino; __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->uid = i_uid_read(inode); __entry->gid = i_gid_read(inode); __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d orig_ino %lu ino %lu mode 0%o uid %u gid %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->orig_ino, (unsigned long) __entry->ino, __entry->mode, __entry->uid, __entry->gid) ); TRACE_EVENT(ext4_free_inode, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( uid_t, uid ) __field( gid_t, gid ) __field( __u64, blocks ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->uid = i_uid_read(inode); __entry->gid = i_gid_read(inode); __entry->blocks = inode->i_blocks; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->uid, __entry->gid, __entry->blocks) ); TRACE_EVENT(ext4_request_inode, TP_PROTO(struct inode *dir, int mode), TP_ARGS(dir, mode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, dir ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = dir->i_ino; __entry->mode = mode; ), TP_printk("dev %d,%d dir %lu mode 0%o", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->dir, __entry->mode) ); TRACE_EVENT(ext4_allocate_inode, TP_PROTO(struct inode *inode, struct inode *dir, int mode), TP_ARGS(inode, dir, mode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ino_t, dir ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->dir = dir->i_ino; __entry->mode = mode; ), TP_printk("dev %d,%d ino %lu dir %lu mode 0%o", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->dir, __entry->mode) ); TRACE_EVENT(ext4_evict_inode, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, nlink ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->nlink = inode->i_nlink; ), TP_printk("dev %d,%d ino %lu nlink %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->nlink) ); TRACE_EVENT(ext4_drop_inode, TP_PROTO(struct inode *inode, int drop), TP_ARGS(inode, drop), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, drop ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->drop = drop; ), TP_printk("dev %d,%d ino %lu drop %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->drop) ); TRACE_EVENT(ext4_nfs_commit_metadata, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; ), TP_printk("dev %d,%d ino %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino) ); TRACE_EVENT(ext4_mark_inode_dirty, TP_PROTO(struct inode *inode, unsigned long IP), TP_ARGS(inode, IP), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field(unsigned long, ip ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->ip = IP; ), TP_printk("dev %d,%d ino %lu caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (void *)__entry->ip) ); TRACE_EVENT(ext4_begin_ordered_truncate, TP_PROTO(struct inode *inode, loff_t new_size), TP_ARGS(inode, new_size), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, new_size ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->new_size = new_size; ), TP_printk("dev %d,%d ino %lu new_size %lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->new_size) ); DECLARE_EVENT_CLASS(ext4__write_begin, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int flags), TP_ARGS(inode, pos, len, flags), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, pos ) __field( unsigned int, len ) __field( unsigned int, flags ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pos = pos; __entry->len = len; __entry->flags = flags; ), TP_printk("dev %d,%d ino %lu pos %lld len %u flags %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->len, __entry->flags) ); DEFINE_EVENT(ext4__write_begin, ext4_write_begin, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int flags), TP_ARGS(inode, pos, len, flags) ); DEFINE_EVENT(ext4__write_begin, ext4_da_write_begin, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int flags), TP_ARGS(inode, pos, len, flags) ); DECLARE_EVENT_CLASS(ext4__write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), TP_ARGS(inode, pos, len, copied), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, pos ) __field( unsigned int, len ) __field( unsigned int, copied ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pos = pos; __entry->len = len; __entry->copied = copied; ), TP_printk("dev %d,%d ino %lu pos %lld len %u copied %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->len, __entry->copied) ); DEFINE_EVENT(ext4__write_end, ext4_write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), TP_ARGS(inode, pos, len, copied) ); DEFINE_EVENT(ext4__write_end, ext4_journalled_write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), TP_ARGS(inode, pos, len, copied) ); DEFINE_EVENT(ext4__write_end, ext4_da_write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), TP_ARGS(inode, pos, len, copied) ); TRACE_EVENT(ext4_writepages, TP_PROTO(struct inode *inode, struct writeback_control *wbc), TP_ARGS(inode, wbc), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino )