1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the Forwarding Information Base. * * Authors: A.N.Kuznetsov, <kuznet@ms2.inr.ac.ru> */ #ifndef _NET_IP_FIB_H #define _NET_IP_FIB_H #include <net/flow.h> #include <linux/seq_file.h> #include <linux/rcupdate.h> #include <net/fib_notifier.h> #include <net/fib_rules.h> #include <net/inetpeer.h> #include <linux/percpu.h> #include <linux/notifier.h> #include <linux/refcount.h> struct fib_config { u8 fc_dst_len; u8 fc_tos; u8 fc_protocol; u8 fc_scope; u8 fc_type; u8 fc_gw_family; /* 2 bytes unused */ u32 fc_table; __be32 fc_dst; union { __be32 fc_gw4; struct in6_addr fc_gw6; }; int fc_oif; u32 fc_flags; u32 fc_priority; __be32 fc_prefsrc; u32 fc_nh_id; struct nlattr *fc_mx; struct rtnexthop *fc_mp; int fc_mx_len; int fc_mp_len; u32 fc_flow; u32 fc_nlflags; struct nl_info fc_nlinfo; struct nlattr *fc_encap; u16 fc_encap_type; }; struct fib_info; struct rtable; struct fib_nh_exception { struct fib_nh_exception __rcu *fnhe_next; int fnhe_genid; __be32 fnhe_daddr; u32 fnhe_pmtu; bool fnhe_mtu_locked; __be32 fnhe_gw; unsigned long fnhe_expires; struct rtable __rcu *fnhe_rth_input; struct rtable __rcu *fnhe_rth_output; unsigned long fnhe_stamp; struct rcu_head rcu; }; struct fnhe_hash_bucket { struct fib_nh_exception __rcu *chain; }; #define FNHE_HASH_SHIFT 11 #define FNHE_HASH_SIZE (1 << FNHE_HASH_SHIFT) #define FNHE_RECLAIM_DEPTH 5 struct fib_nh_common { struct net_device *nhc_dev; int nhc_oif; unsigned char nhc_scope; u8 nhc_family; u8 nhc_gw_family; unsigned char nhc_flags; struct lwtunnel_state *nhc_lwtstate; union { __be32 ipv4; struct in6_addr ipv6; } nhc_gw; int nhc_weight; atomic_t nhc_upper_bound; /* v4 specific, but allows fib6_nh with v4 routes */ struct rtable __rcu * __percpu *nhc_pcpu_rth_output; struct rtable __rcu *nhc_rth_input; struct fnhe_hash_bucket __rcu *nhc_exceptions; }; struct fib_nh { struct fib_nh_common nh_common; struct hlist_node nh_hash; struct fib_info *nh_parent; #ifdef CONFIG_IP_ROUTE_CLASSID __u32 nh_tclassid; #endif __be32 nh_saddr; int nh_saddr_genid; #define fib_nh_family nh_common.nhc_family #define fib_nh_dev nh_common.nhc_dev #define fib_nh_oif nh_common.nhc_oif #define fib_nh_flags nh_common.nhc_flags #define fib_nh_lws nh_common.nhc_lwtstate #define fib_nh_scope nh_common.nhc_scope #define fib_nh_gw_family nh_common.nhc_gw_family #define fib_nh_gw4 nh_common.nhc_gw.ipv4 #define fib_nh_gw6 nh_common.nhc_gw.ipv6 #define fib_nh_weight nh_common.nhc_weight #define fib_nh_upper_bound nh_common.nhc_upper_bound }; /* * This structure contains data shared by many of routes. */ struct nexthop; struct fib_info { struct hlist_node fib_hash; struct hlist_node fib_lhash; struct list_head nh_list; struct net *fib_net; int fib_treeref; refcount_t fib_clntref; unsigned int fib_flags; unsigned char fib_dead; unsigned char fib_protocol; unsigned char fib_scope; unsigned char fib_type; __be32 fib_prefsrc; u32 fib_tb_id; u32 fib_priority; struct dst_metrics *fib_metrics; #define fib_mtu fib_metrics->metrics[RTAX_MTU-1] #define fib_window fib_metrics->metrics[RTAX_WINDOW-1] #define fib_rtt fib_metrics->metrics[RTAX_RTT-1] #define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] int fib_nhs; bool fib_nh_is_v6; bool nh_updated; struct nexthop *nh; struct rcu_head rcu; struct fib_nh fib_nh[]; }; #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rule; #endif struct fib_table; struct fib_result { __be32 prefix; unsigned char prefixlen; unsigned char nh_sel; unsigned char type; unsigned char scope; u32 tclassid; struct fib_nh_common *nhc; struct fib_info *fi; struct fib_table *table; struct hlist_head *fa_head; }; struct fib_result_nl { __be32 fl_addr; /* To be looked up*/ u32 fl_mark; unsigned char fl_tos; unsigned char fl_scope; unsigned char tb_id_in; unsigned char tb_id; /* Results */ unsigned char prefixlen; unsigned char nh_sel; unsigned char type; unsigned char scope; int err; }; #ifdef CONFIG_IP_MULTIPLE_TABLES #define FIB_TABLE_HASHSZ 256 #else #define FIB_TABLE_HASHSZ 2 #endif __be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc, unsigned char scope); __be32 fib_result_prefsrc(struct net *net, struct fib_result *res); #define FIB_RES_NHC(res) ((res).nhc) #define FIB_RES_DEV(res) (FIB_RES_NHC(res)->nhc_dev) #define FIB_RES_OIF(res) (FIB_RES_NHC(res)->nhc_oif) struct fib_rt_info { struct fib_info *fi; u32 tb_id; __be32 dst; int dst_len; u8 tos; u8 type; u8 offload:1, trap:1, unused:6; }; struct fib_entry_notifier_info { struct fib_notifier_info info; /* must be first */ u32 dst; int dst_len; struct fib_info *fi; u8 tos; u8 type; u32 tb_id; }; struct fib_nh_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib_nh *fib_nh; }; int call_fib4_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info); int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info); int __net_init fib4_notifier_init(struct net *net); void __net_exit fib4_notifier_exit(struct net *net); void fib_info_notify_update(struct net *net, struct nl_info *info); int fib_notify(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); struct fib_table { struct hlist_node tb_hlist; u32 tb_id; int tb_num_default; struct rcu_head rcu; unsigned long *tb_data; unsigned long __data[]; }; struct fib_dump_filter { u32 table_id; /* filter_set is an optimization that an entry is set */ bool filter_set; bool dump_routes; bool dump_exceptions; unsigned char protocol; unsigned char rt_type; unsigned int flags; struct net_device *dev; }; int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags); int fib_table_insert(struct net *, struct fib_table *, struct fib_config *, struct netlink_ext_ack *extack); int fib_table_delete(struct net *, struct fib_table *, struct fib_config *, struct netlink_ext_ack *extack); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb, struct fib_dump_filter *filter); int fib_table_flush(struct net *net, struct fib_table *table, bool flush_all); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); #ifndef CONFIG_IP_MULTIPLE_TABLES #define TABLE_LOCAL_INDEX (RT_TABLE_LOCAL & (FIB_TABLE_HASHSZ - 1)) #define TABLE_MAIN_INDEX (RT_TABLE_MAIN & (FIB_TABLE_HASHSZ - 1)) static inline struct fib_table *fib_get_table(struct net *net, u32 id) { struct hlist_node *tb_hlist; struct hlist_head *ptr; ptr = id == RT_TABLE_LOCAL ? &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX] : &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]; tb_hlist = rcu_dereference_rtnl(hlist_first_rcu(ptr)); return hlist_entry(tb_hlist, struct fib_table, tb_hlist); } static inline struct fib_table *fib_new_table(struct net *net, u32 id) { return fib_get_table(net, id); } static inline int fib_lookup(struct net *net, const struct flowi4 *flp, struct fib_result *res, unsigned int flags) { struct fib_table *tb; int err = -ENETUNREACH; rcu_read_lock(); tb = fib_get_table(net, RT_TABLE_MAIN); if (tb) err = fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF); if (err == -EAGAIN) err = -ENETUNREACH; rcu_read_unlock(); return err; } static inline bool fib4_has_custom_rules(const struct net *net) { return false; } static inline bool fib4_rule_default(const struct fib_rule *rule) { return true; } static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return 0; } static inline unsigned int fib4_rules_seq_read(struct net *net) { return 0; } static inline bool fib4_rules_early_flow_dissect(struct net *net, struct sk_buff *skb, struct flowi4 *fl4, struct flow_keys *flkeys) { return false; } #else /* CONFIG_IP_MULTIPLE_TABLES */ int __net_init fib4_rules_init(struct net *net); void __net_exit fib4_rules_exit(struct net *net); struct fib_table *fib_new_table(struct net *net, u32 id); struct fib_table *fib_get_table(struct net *net, u32 id); int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res, unsigned int flags); static inline int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res, unsigned int flags) { struct fib_table *tb; int err = -ENETUNREACH; flags |= FIB_LOOKUP_NOREF; if (net->ipv4.fib_has_custom_rules) return __fib_lookup(net, flp, res, flags); rcu_read_lock(); res->tclassid = 0; tb = rcu_dereference_rtnl(net->ipv4.fib_main); if (tb) err = fib_table_lookup(tb, flp, res, flags); if (!err) goto out; tb = rcu_dereference_rtnl(net->ipv4.fib_default); if (tb) err = fib_table_lookup(tb, flp, res, flags); out: if (err == -EAGAIN) err = -ENETUNREACH; rcu_read_unlock(); return err; } static inline bool fib4_has_custom_rules(const struct net *net) { return net->ipv4.fib_has_custom_rules; } bool fib4_rule_default(const struct fib_rule *rule); int fib4_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); unsigned int fib4_rules_seq_read(struct net *net); static inline bool fib4_rules_early_flow_dissect(struct net *net, struct sk_buff *skb, struct flowi4 *fl4, struct flow_keys *flkeys) { unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; if (!net->ipv4.fib_rules_require_fldissect) return false; skb_flow_dissect_flow_keys(skb, flkeys, flag); fl4->fl4_sport = flkeys->ports.src; fl4->fl4_dport = flkeys->ports.dst; fl4->flowi4_proto = flkeys->basic.ip_proto; return true; } #endif /* CONFIG_IP_MULTIPLE_TABLES */ /* Exported by fib_frontend.c */ extern const struct nla_policy rtm_ipv4_policy[]; void ip_fib_init(void); int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla, struct netlink_ext_ack *extack); __be32 fib_compute_spec_dst(struct sk_buff *skb); bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev); int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, struct in_device *idev, u32 *itag); #ifdef CONFIG_IP_ROUTE_CLASSID static inline int fib_num_tclassid_users(struct net *net) { return atomic_read(&net->ipv4.fib_num_tclassid_users); } #else static inline int fib_num_tclassid_users(struct net *net) { return 0; } #endif int fib_unmerge(struct net *net); static inline bool nhc_l3mdev_matches_dev(const struct fib_nh_common *nhc, const struct net_device *dev) { if (nhc->nhc_dev == dev || l3mdev_master_ifindex_rcu(nhc->nhc_dev) == dev->ifindex) return true; return false; } /* Exported by fib_semantics.c */ int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned char nh_flags); void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig); #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); #endif int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, struct netlink_ext_ack *extack); void fib_select_multipath(struct fib_result *res, int hash); void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb); int fib_nh_init(struct net *net, struct fib_nh *fib_nh, struct fib_config *cfg, int nh_weight, struct netlink_ext_ack *extack); void fib_nh_release(struct net *net, struct fib_nh *fib_nh); int fib_nh_common_init(struct net *net, struct fib_nh_common *nhc, struct nlattr *fc_encap, u16 fc_encap_type, void *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void fib_nh_common_release(struct fib_nh_common *nhc); /* Exported by fib_trie.c */ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri); void fib_trie_init(void); struct fib_table *fib_trie_table(u32 id, struct fib_table *alias); bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags, const struct flowi4 *flp); static inline void fib_combine_itag(u32 *itag, const struct fib_result *res) { #ifdef CONFIG_IP_ROUTE_CLASSID struct fib_nh_common *nhc = res->nhc; #ifdef CONFIG_IP_MULTIPLE_TABLES u32 rtag; #endif if (nhc->nhc_family == AF_INET) { struct fib_nh *nh; nh = container_of(nhc, struct fib_nh, nh_common); *itag = nh->nh_tclassid << 16; } else { *itag = 0; } #ifdef CONFIG_IP_MULTIPLE_TABLES rtag = res->tclassid; if (*itag == 0) *itag = (rtag<<16); *itag |= (rtag>>16); #endif #endif } void fib_flush(struct net *net); void free_fib_info(struct fib_info *fi); static inline void fib_info_hold(struct fib_info *fi) { refcount_inc(&fi->fib_clntref); } static inline void fib_info_put(struct fib_info *fi) { if (refcount_dec_and_test(&fi->fib_clntref)) free_fib_info(fi); } #ifdef CONFIG_PROC_FS int __net_init fib_proc_init(struct net *net); void __net_exit fib_proc_exit(struct net *net); #else static inline int fib_proc_init(struct net *net) { return 0; } static inline void fib_proc_exit(struct net *net) { } #endif u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr); int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct fib_dump_filter *filter, struct netlink_callback *cb); int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh, u8 rt_family, unsigned char *flags, bool skip_oif); int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh, int nh_weight, u8 rt_family, u32 nh_tclassid); #endif /* _NET_FIB_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Checksumming functions for IP, TCP, UDP and so on * * Authors: Jorge Cwik, <jorge@laser.satlink.net> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Borrows very liberally from tcp.c and ip.c, see those * files for more names. */ #ifndef _CHECKSUM_H #define _CHECKSUM_H #include <linux/errno.h> #include <asm/types.h> #include <asm/byteorder.h> #include <linux/uaccess.h> #include <asm/checksum.h> #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER static inline __wsum csum_and_copy_from_user (const void __user *src, void *dst, int len) { if (copy_from_user(dst, src, len)) return 0; return csum_partial(dst, len, ~0U); } #endif #ifndef HAVE_CSUM_COPY_USER static __inline__ __wsum csum_and_copy_to_user (const void *src, void __user *dst, int len) { __wsum sum = csum_partial(src, len, ~0U); if (copy_to_user(dst, src, len) == 0) return sum; return 0; } #endif #ifndef _HAVE_ARCH_CSUM_AND_COPY static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len) { memcpy(dst, src, len); return csum_partial(dst, len, 0); } #endif #ifndef HAVE_ARCH_CSUM_ADD static inline __wsum csum_add(__wsum csum, __wsum addend) { u32 res = (__force u32)csum; res += (__force u32)addend; return (__force __wsum)(res + (res < (__force u32)addend)); } #endif static inline __wsum csum_sub(__wsum csum, __wsum addend) { return csum_add(csum, ~addend); } static inline __sum16 csum16_add(__sum16 csum, __be16 addend) { u16 res = (__force u16)csum; res += (__force u16)addend; return (__force __sum16)(res + (res < (__force u16)addend)); } static inline __sum16 csum16_sub(__sum16 csum, __be16 addend) { return csum16_add(csum, ~addend); } static inline __wsum csum_block_add(__wsum csum, __wsum csum2, int offset) { u32 sum = (__force u32)csum2; /* rotate sum to align it with a 16b boundary */ if (offset & 1) sum = ror32(sum, 8); return csum_add(csum, (__force __wsum)sum); } static inline __wsum csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len) { return csum_block_add(csum, csum2, offset); } static inline __wsum csum_block_sub(__wsum csum, __wsum csum2, int offset) { return csum_block_add(csum, ~csum2, offset); } static inline __wsum csum_unfold(__sum16 n) { return (__force __wsum)n; } static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum) { return csum_partial(buff, len, sum); } #define CSUM_MANGLED_0 ((__force __sum16)0xffff) static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) { *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum))); } static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) { __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from); *sum = csum_fold(csum_add(tmp, (__force __wsum)to)); } /* Implements RFC 1624 (Incremental Internet Checksum) * 3. Discussion states : * HC' = ~(~HC + ~m + m') * m : old value of a 16bit field * m' : new value of a 16bit field */ static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) { *sum = ~csum16_add(csum16_sub(~(*sum), old), new); } struct sk_buff; void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, __be32 from, __be32 to, bool pseudohdr); void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, bool pseudohdr); void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, __wsum diff, bool pseudohdr); static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, __be16 from, __be16 to, bool pseudohdr) { inet_proto_csum_replace4(sum, skb, (__force __be32)from, (__force __be32)to, pseudohdr); } static inline __wsum remcsum_adjust(void *ptr, __wsum csum, int start, int offset) { __sum16 *psum = (__sum16 *)(ptr + offset); __wsum delta; /* Subtract out checksum up to start */ csum = csum_sub(csum, csum_partial(ptr, start, 0)); /* Set derived checksum in packet */ delta = csum_sub((__force __wsum)csum_fold(csum), (__force __wsum)*psum); *psum = csum_fold(csum); return delta; } static inline void remcsum_unadjust(__sum16 *psum, __wsum delta) { *psum = csum_fold(csum_sub(delta, (__force __wsum)*psum)); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _DELAYED_CALL_H #define _DELAYED_CALL_H /* * Poor man's closures; I wish we could've done them sanely polymorphic, * but... */ struct delayed_call { void (*fn)(void *); void *arg; }; #define DEFINE_DELAYED_CALL(name) struct delayed_call name = {NULL, NULL} /* I really wish we had closures with sane typechecking... */ static inline void set_delayed_call(struct delayed_call *call, void (*fn)(void *), void *arg) { call->fn = fn; call->arg = arg; } static inline void do_delayed_call(struct delayed_call *call) { if (call->fn) call->fn(call->arg); } static inline void clear_delayed_call(struct delayed_call *call) { call->fn = NULL; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NLS_H #define _LINUX_NLS_H #include <linux/init.h> /* Unicode has changed over the years. Unicode code points no longer * fit into 16 bits; as of Unicode 5 valid code points range from 0 * to 0x10ffff (17 planes, where each plane holds 65536 code points). * * The original decision to represent Unicode characters as 16-bit * wchar_t values is now outdated. But plane 0 still includes the * most commonly used characters, so we will retain it. The newer * 32-bit unicode_t type can be used when it is necessary to * represent the full Unicode character set. */ /* Plane-0 Unicode character */ typedef u16 wchar_t; #define MAX_WCHAR_T 0xffff /* Arbitrary Unicode character */ typedef u32 unicode_t; struct nls_table { const char *charset; const char *alias; int (*uni2char) (wchar_t uni, unsigned char *out, int boundlen); int (*char2uni) (const unsigned char *rawstring, int boundlen, wchar_t *uni); const unsigned char *charset2lower; const unsigned char *charset2upper; struct module *owner; struct nls_table *next; }; /* this value hold the maximum octet of charset */ #define NLS_MAX_CHARSET_SIZE 6 /* for UTF-8 */ /* Byte order for UTF-16 strings */ enum utf16_endian { UTF16_HOST_ENDIAN, UTF16_LITTLE_ENDIAN, UTF16_BIG_ENDIAN }; /* nls_base.c */ extern int __register_nls(struct nls_table *, struct module *); extern int unregister_nls(struct nls_table *); extern struct nls_table *load_nls(char *); extern void unload_nls(struct nls_table *); extern struct nls_table *load_nls_default(void); #define register_nls(nls) __register_nls((nls), THIS_MODULE) extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu); extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen); extern int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian, wchar_t *pwcs, int maxlen); extern int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian, u8 *s, int maxlen); static inline unsigned char nls_tolower(struct nls_table *t, unsigned char c) { unsigned char nc = t->charset2lower[c]; return nc ? nc : c; } static inline unsigned char nls_toupper(struct nls_table *t, unsigned char c) { unsigned char nc = t->charset2upper[c]; return nc ? nc : c; } static inline int nls_strnicmp(struct nls_table *t, const unsigned char *s1, const unsigned char *s2, int len) { while (len--) { if (nls_tolower(t, *s1++) != nls_tolower(t, *s2++)) return 1; } return 0; } /* * nls_nullsize - return length of null character for codepage * @codepage - codepage for which to return length of NULL terminator * * Since we can't guarantee that the null terminator will be a particular * length, we have to check against the codepage. If there's a problem * determining it, assume a single-byte NULL terminator. */ static inline int nls_nullsize(const struct nls_table *codepage) { int charlen; char tmp[NLS_MAX_CHARSET_SIZE]; charlen = codepage->uni2char(0, tmp, NLS_MAX_CHARSET_SIZE); return charlen > 0 ? charlen : 1; } #define MODULE_ALIAS_NLS(name) MODULE_ALIAS("nls_" __stringify(name)) #endif /* _LINUX_NLS_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_STRING_HELPERS_H_ #define _LINUX_STRING_HELPERS_H_ #include <linux/ctype.h> #include <linux/types.h> struct file; struct task_struct; /* Descriptions of the types of units to * print in */ enum string_size_units { STRING_UNITS_10, /* use powers of 10^3 (standard SI) */ STRING_UNITS_2, /* use binary powers of 2^10 */ }; void string_get_size(u64 size, u64 blk_size, enum string_size_units units, char *buf, int len); #define UNESCAPE_SPACE 0x01 #define UNESCAPE_OCTAL 0x02 #define UNESCAPE_HEX 0x04 #define UNESCAPE_SPECIAL 0x08 #define UNESCAPE_ANY \ (UNESCAPE_SPACE | UNESCAPE_OCTAL | UNESCAPE_HEX | UNESCAPE_SPECIAL) int string_unescape(char *src, char *dst, size_t size, unsigned int flags); static inline int string_unescape_inplace(char *buf, unsigned int flags) { return string_unescape(buf, buf, 0, flags); } static inline int string_unescape_any(char *src, char *dst, size_t size) { return string_unescape(src, dst, size, UNESCAPE_ANY); } static inline int string_unescape_any_inplace(char *buf) { return string_unescape_any(buf, buf, 0); } #define ESCAPE_SPACE 0x01 #define ESCAPE_SPECIAL 0x02 #define ESCAPE_NULL 0x04 #define ESCAPE_OCTAL 0x08 #define ESCAPE_ANY \ (ESCAPE_SPACE | ESCAPE_OCTAL | ESCAPE_SPECIAL | ESCAPE_NULL) #define ESCAPE_NP 0x10 #define ESCAPE_ANY_NP (ESCAPE_ANY | ESCAPE_NP) #define ESCAPE_HEX 0x20 int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, unsigned int flags, const char *only); int string_escape_mem_ascii(const char *src, size_t isz, char *dst, size_t osz); static inline int string_escape_mem_any_np(const char *src, size_t isz, char *dst, size_t osz, const char *only) { return string_escape_mem(src, isz, dst, osz, ESCAPE_ANY_NP, only); } static inline int string_escape_str(const char *src, char *dst, size_t sz, unsigned int flags, const char *only) { return string_escape_mem(src, strlen(src), dst, sz, flags, only); } static inline int string_escape_str_any_np(const char *src, char *dst, size_t sz, const char *only) { return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, only); } static inline void string_upper(char *dst, const char *src) { do { *dst++ = toupper(*src); } while (*src++); } static inline void string_lower(char *dst, const char *src) { do { *dst++ = tolower(*src); } while (*src++); } char *kstrdup_quotable(const char *src, gfp_t gfp); char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp); char *kstrdup_quotable_file(struct file *file, gfp_t gfp); void kfree_strarray(char **array, size_t n); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM ext4 #if !defined(_TRACE_EXT4_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_EXT4_H #include <linux/writeback.h> #include <linux/tracepoint.h> struct ext4_allocation_context; struct ext4_allocation_request; struct ext4_extent; struct ext4_prealloc_space; struct ext4_inode_info; struct mpage_da_data; struct ext4_map_blocks; struct extent_status; struct ext4_fsmap; struct partial_cluster; #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) #define show_mballoc_flags(flags) __print_flags(flags, "|", \ { EXT4_MB_HINT_MERGE, "HINT_MERGE" }, \ { EXT4_MB_HINT_RESERVED, "HINT_RESV" }, \ { EXT4_MB_HINT_METADATA, "HINT_MDATA" }, \ { EXT4_MB_HINT_FIRST, "HINT_FIRST" }, \ { EXT4_MB_HINT_BEST, "HINT_BEST" }, \ { EXT4_MB_HINT_DATA, "HINT_DATA" }, \ { EXT4_MB_HINT_NOPREALLOC, "HINT_NOPREALLOC" }, \ { EXT4_MB_HINT_GROUP_ALLOC, "HINT_GRP_ALLOC" }, \ { EXT4_MB_HINT_GOAL_ONLY, "HINT_GOAL_ONLY" }, \ { EXT4_MB_HINT_TRY_GOAL, "HINT_TRY_GOAL" }, \ { EXT4_MB_DELALLOC_RESERVED, "DELALLOC_RESV" }, \ { EXT4_MB_STREAM_ALLOC, "STREAM_ALLOC" }, \ { EXT4_MB_USE_ROOT_BLOCKS, "USE_ROOT_BLKS" }, \ { EXT4_MB_USE_RESERVED, "USE_RESV" }, \ { EXT4_MB_STRICT_CHECK, "STRICT_CHECK" }) #define show_map_flags(flags) __print_flags(flags, "|", \ { EXT4_GET_BLOCKS_CREATE, "CREATE" }, \ { EXT4_GET_BLOCKS_UNWRIT_EXT, "UNWRIT" }, \ { EXT4_GET_BLOCKS_DELALLOC_RESERVE, "DELALLOC" }, \ { EXT4_GET_BLOCKS_PRE_IO, "PRE_IO" }, \ { EXT4_GET_BLOCKS_CONVERT, "CONVERT" }, \ { EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \ { EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \ { EXT4_GET_BLOCKS_CONVERT_UNWRITTEN, "CONVERT_UNWRITTEN" }, \ { EXT4_GET_BLOCKS_ZERO, "ZERO" }, \ { EXT4_GET_BLOCKS_IO_SUBMIT, "IO_SUBMIT" }, \ { EXT4_EX_NOCACHE, "EX_NOCACHE" }) /* * __print_flags() requires that all enum values be wrapped in the * TRACE_DEFINE_ENUM macro so that the enum value can be encoded in the ftrace * ring buffer. */ TRACE_DEFINE_ENUM(BH_New); TRACE_DEFINE_ENUM(BH_Mapped); TRACE_DEFINE_ENUM(BH_Unwritten); TRACE_DEFINE_ENUM(BH_Boundary); #define show_mflags(flags) __print_flags(flags, "", \ { EXT4_MAP_NEW, "N" }, \ { EXT4_MAP_MAPPED, "M" }, \ { EXT4_MAP_UNWRITTEN, "U" }, \ { EXT4_MAP_BOUNDARY, "B" }) #define show_free_flags(flags) __print_flags(flags, "|", \ { EXT4_FREE_BLOCKS_METADATA, "METADATA" }, \ { EXT4_FREE_BLOCKS_FORGET, "FORGET" }, \ { EXT4_FREE_BLOCKS_VALIDATED, "VALIDATED" }, \ { EXT4_FREE_BLOCKS_NO_QUOT_UPDATE, "NO_QUOTA" }, \ { EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER,"1ST_CLUSTER" },\ { EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER, "LAST_CLUSTER" }) TRACE_DEFINE_ENUM(ES_WRITTEN_B); TRACE_DEFINE_ENUM(ES_UNWRITTEN_B); TRACE_DEFINE_ENUM(ES_DELAYED_B); TRACE_DEFINE_ENUM(ES_HOLE_B); TRACE_DEFINE_ENUM(ES_REFERENCED_B); #define show_extent_status(status) __print_flags(status, "", \ { EXTENT_STATUS_WRITTEN, "W" }, \ { EXTENT_STATUS_UNWRITTEN, "U" }, \ { EXTENT_STATUS_DELAYED, "D" }, \ { EXTENT_STATUS_HOLE, "H" }, \ { EXTENT_STATUS_REFERENCED, "R" }) #define show_falloc_mode(mode) __print_flags(mode, "|", \ { FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \ { FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \ { FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}, \ { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \ { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) #define show_fc_reason(reason) \ __print_symbolic(reason, \ { EXT4_FC_REASON_XATTR, "XATTR"}, \ { EXT4_FC_REASON_CROSS_RENAME, "CROSS_RENAME"}, \ { EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, "JOURNAL_FLAG_CHANGE"}, \ { EXT4_FC_REASON_NOMEM, "NO_MEM"}, \ { EXT4_FC_REASON_SWAP_BOOT, "SWAP_BOOT"}, \ { EXT4_FC_REASON_RESIZE, "RESIZE"}, \ { EXT4_FC_REASON_RENAME_DIR, "RENAME_DIR"}, \ { EXT4_FC_REASON_FALLOC_RANGE, "FALLOC_RANGE"}, \ { EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"}) TRACE_EVENT(ext4_other_inode_update_time, TP_PROTO(struct inode *inode, ino_t orig_ino), TP_ARGS(inode, orig_ino), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ino_t, orig_ino ) __field( uid_t, uid ) __field( gid_t, gid ) __field( __u16, mode ) ), TP_fast_assign( __entry->orig_ino = orig_ino; __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->uid = i_uid_read(inode); __entry->gid = i_gid_read(inode); __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d orig_ino %lu ino %lu mode 0%o uid %u gid %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->orig_ino, (unsigned long) __entry->ino, __entry->mode, __entry->uid, __entry->gid) ); TRACE_EVENT(ext4_free_inode, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( uid_t, uid ) __field( gid_t, gid ) __field( __u64, blocks ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->uid = i_uid_read(inode); __entry->gid = i_gid_read(inode); __entry->blocks = inode->i_blocks; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->uid, __entry->gid, __entry->blocks) ); TRACE_EVENT(ext4_request_inode, TP_PROTO(struct inode *dir, int mode), TP_ARGS(dir, mode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, dir ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = dir->i_ino; __entry->mode = mode; ), TP_printk("dev %d,%d dir %lu mode 0%o", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->dir, __entry->mode) ); TRACE_EVENT(ext4_allocate_inode, TP_PROTO(struct inode *inode, struct inode *dir, int mode), TP_ARGS(inode, dir, mode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ino_t, dir ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->dir = dir->i_ino; __entry->mode = mode; ), TP_printk("dev %d,%d ino %lu dir %lu mode 0%o", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->dir, __entry->mode) ); TRACE_EVENT(ext4_evict_inode, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, nlink ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->nlink = inode->i_nlink; ), TP_printk("dev %d,%d ino %lu nlink %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->nlink) ); TRACE_EVENT(ext4_drop_inode, TP_PROTO(struct inode *inode, int drop), TP_ARGS(inode, drop), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, drop ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->drop = drop; ), TP_printk("dev %d,%d ino %lu drop %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->drop) ); TRACE_EVENT(ext4_nfs_commit_metadata, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; ), TP_printk("dev %d,%d ino %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino) ); TRACE_EVENT(ext4_mark_inode_dirty, TP_PROTO(struct inode *inode, unsigned long IP), TP_ARGS(inode, IP), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field(unsigned long, ip ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->ip = IP; ), TP_printk("dev %d,%d ino %lu caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (void *)__entry->ip) ); TRACE_EVENT(ext4_begin_ordered_truncate, TP_PROTO(struct inode *inode, loff_t new_size), TP_ARGS(inode, new_size), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, new_size ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->new_size = new_size; ), TP_printk("dev %d,%d ino %lu new_size %lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->new_size) ); DECLARE_EVENT_CLASS(ext4__write_begin, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int flags), TP_ARGS(inode, pos, len, flags), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, pos ) __field( unsigned int, len ) __field( unsigned int, flags ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pos = pos; __entry->len = len; __entry->flags = flags; ), TP_printk("dev %d,%d ino %lu pos %lld len %u flags %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->len, __entry->flags) ); DEFINE_EVENT(ext4__write_begin, ext4_write_begin, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int flags), TP_ARGS(inode, pos, len, flags) ); DEFINE_EVENT(ext4__write_begin, ext4_da_write_begin, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int flags), TP_ARGS(inode, pos, len, flags) ); DECLARE_EVENT_CLASS(ext4__write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), TP_ARGS(inode, pos, len, copied), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, pos ) __field( unsigned int, len ) __field( unsigned int, copied ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pos = pos; __entry->len = len; __entry->copied = copied; ), TP_printk("dev %d,%d ino %lu pos %lld len %u copied %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->len, __entry->copied) ); DEFINE_EVENT(ext4__write_end, ext4_write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), TP_ARGS(inode, pos, len, copied) ); DEFINE_EVENT(ext4__write_end, ext4_journalled_write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), TP_ARGS(inode, pos, len, copied) ); DEFINE_EVENT(ext4__write_end, ext4_da_write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), TP_ARGS(inode, pos, len, copied) ); TRACE_EVENT(ext4_writepages, TP_PROTO(struct inode *inode, struct writeback_control *wbc), TP_ARGS(inode, wbc), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( long, nr_to_write ) __field( long, pages_skipped ) __field( loff_t, range_start ) __field( loff_t, range_end ) __field( pgoff_t, writeback_index ) __field( int, sync_mode ) __field( char, for_kupdate ) __field( char, range_cyclic ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->nr_to_write = wbc->nr_to_write; __entry->pages_skipped = wbc->pages_skipped; __entry->range_start = wbc->range_start; __entry->range_end = wbc->range_end; __entry->writeback_index = inode->i_mapping->writeback_index; __entry->sync_mode = wbc->sync_mode; __entry->for_kupdate = wbc->for_kupdate; __entry->range_cyclic = wbc->range_cyclic; ), TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld " "range_start %lld range_end %lld sync_mode %d " "for_kupdate %d range_cyclic %d writeback_index %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->nr_to_write, __entry->pages_skipped, __entry->range_start, __entry->range_end, __entry->sync_mode, __entry->for_kupdate, __entry->range_cyclic, (unsigned long) __entry->writeback_index) ); TRACE_EVENT(ext4_da_write_pages, TP_PROTO(struct inode *inode, pgoff_t first_page, struct writeback_control *wbc), TP_ARGS(inode, first_page, wbc), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( pgoff_t, first_page ) __field( long, nr_to_write ) __field( int, sync_mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->first_page = first_page; __entry->nr_to_write = wbc->nr_to_write; __entry->sync_mode = wbc->sync_mode; ), TP_printk("dev %d,%d ino %lu first_page %lu nr_to_write %ld " "sync_mode %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->first_page, __entry->nr_to_write, __entry->sync_mode) ); TRACE_EVENT(ext4_da_write_pages_extent, TP_PROTO(struct inode *inode, struct ext4_map_blocks *map), TP_ARGS(inode, map), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, lblk ) __field( __u32, len ) __field( __u32, flags ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = map->m_lblk; __entry->len = map->m_len; __entry->flags = map->m_flags; ), TP_printk("dev %d,%d ino %lu lblk %llu len %u flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, show_mflags(__entry->flags)) ); TRACE_EVENT(ext4_writepages_result, TP_PROTO(struct inode *inode, struct writeback_control *wbc, int ret, int pages_written), TP_ARGS(inode, wbc, ret, pages_written), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, ret ) __field( int, pages_written ) __field( long, pages_skipped ) __field( pgoff_t, writeback_index ) __field( int, sync_mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->ret = ret; __entry->pages_written = pages_written; __entry->pages_skipped = wbc->pages_skipped; __entry->writeback_index = inode->i_mapping->writeback_index; __entry->sync_mode = wbc->sync_mode; ), TP_printk("dev %d,%d ino %lu ret %d pages_written %d pages_skipped %ld " "sync_mode %d writeback_index %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->ret, __entry->pages_written, __entry->pages_skipped, __entry->sync_mode, (unsigned long) __entry->writeback_index) ); DECLARE_EVENT_CLASS(ext4__page_op, TP_PROTO(struct page *page), TP_ARGS(page), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( pgoff_t, index ) ), TP_fast_assign( __entry->dev = page->mapping->host->i_sb->s_dev; __entry->ino = page->mapping->host->i_ino; __entry->index = page->index; ), TP_printk("dev %d,%d ino %lu page_index %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->index) ); DEFINE_EVENT(ext4__page_op, ext4_writepage, TP_PROTO(struct page *page), TP_ARGS(page) ); DEFINE_EVENT(ext4__page_op, ext4_readpage, TP_PROTO(struct page *page), TP_ARGS(page) ); DEFINE_EVENT(ext4__page_op, ext4_releasepage, TP_PROTO(struct page *page), TP_ARGS(page) ); DECLARE_EVENT_CLASS(ext4_invalidatepage_op, TP_PROTO(struct page *page, unsigned int offset, unsigned int length), TP_ARGS(page, offset, length), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( pgoff_t, index ) __field( unsigned int, offset ) __field( unsigned int, length ) ), TP_fast_assign( __entry->dev = page->mapping->host->i_sb->s_dev; __entry->ino = page->mapping->host->i_ino; __entry->index = page->index; __entry->offset = offset; __entry->length = length; ), TP_printk("dev %d,%d ino %lu page_index %lu offset %u length %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->index, __entry->offset, __entry->length) ); DEFINE_EVENT(ext4_invalidatepage_op, ext4_invalidatepage, TP_PROTO(struct page *page, unsigned int offset, unsigned int length), TP_ARGS(page, offset, length) ); DEFINE_EVENT(ext4_invalidatepage_op, ext4_journalled_invalidatepage, TP_PROTO(struct page *page, unsigned int offset, unsigned int length), TP_ARGS(page, offset, length) ); TRACE_EVENT(ext4_discard_blocks, TP_PROTO(struct super_block *sb, unsigned long long blk, unsigned long long count), TP_ARGS(sb, blk, count), TP_STRUCT__entry( __field( dev_t, dev ) __field( __u64, blk ) __field( __u64, count ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->blk = blk; __entry->count = count; ), TP_printk("dev %d,%d blk %llu count %llu", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->blk, __entry->count) ); DECLARE_EVENT_CLASS(ext4__mb_new_pa, TP_PROTO(struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa), TP_ARGS(ac, pa), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, pa_pstart ) __field( __u64, pa_lstart ) __field( __u32, pa_len ) ), TP_fast_assign( __entry->dev = ac->ac_sb->s_dev; __entry->ino = ac->ac_inode->i_ino; __entry->pa_pstart = pa->pa_pstart; __entry->pa_lstart = pa->pa_lstart; __entry->pa_len = pa->pa_len; ), TP_printk("dev %d,%d ino %lu pstart %llu len %u lstart %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart) ); DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_inode_pa, TP_PROTO(struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa), TP_ARGS(ac, pa) ); DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_group_pa, TP_PROTO(struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa), TP_ARGS(ac, pa) ); TRACE_EVENT(ext4_mb_release_inode_pa, TP_PROTO(struct ext4_prealloc_space *pa, unsigned long long block, unsigned int count), TP_ARGS(pa, block, count), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, block ) __field( __u32, count ) ), TP_fast_assign( __entry->dev = pa->pa_inode->i_sb->s_dev; __entry->ino = pa->pa_inode->i_ino; __entry->block = block; __entry->count = count; ), TP_printk("dev %d,%d ino %lu block %llu count %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->block, __entry->count) ); TRACE_EVENT(ext4_mb_release_group_pa, TP_PROTO(struct super_block *sb, struct ext4_prealloc_space *pa), TP_ARGS(sb, pa), TP_STRUCT__entry( __field( dev_t, dev ) __field( __u64, pa_pstart ) __field( __u32, pa_len ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->pa_pstart = pa->pa_pstart; __entry->pa_len = pa->pa_len; ), TP_printk("dev %d,%d pstart %llu len %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->pa_pstart, __entry->pa_len) ); TRACE_EVENT(ext4_discard_preallocations, TP_PROTO(struct inode *inode, unsigned int len, unsigned int needed), TP_ARGS(inode, len, needed), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( unsigned int, len ) __field( unsigned int, needed ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->len = len; __entry->needed = needed; ), TP_printk("dev %d,%d ino %lu len: %u needed %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->len, __entry->needed) ); TRACE_EVENT(ext4_mb_discard_preallocations, TP_PROTO(struct super_block *sb, int needed), TP_ARGS(sb, needed), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, needed ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->needed = needed; ), TP_printk("dev %d,%d needed %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->needed) ); TRACE_EVENT(ext4_request_blocks, TP_PROTO(struct ext4_allocation_request *ar), TP_ARGS(ar), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( unsigned int, len ) __field( __u32, logical ) __field( __u32, lleft ) __field( __u32, lright ) __field( __u64, goal ) __field( __u64, pleft ) __field( __u64, pright ) __field( unsigned int, flags ) ), TP_fast_assign( __entry->dev = ar->inode->i_sb->s_dev; __entry->ino = ar->inode->i_ino; __entry->len = ar->len; __entry->logical = ar->logical; __entry->goal = ar->goal; __entry->lleft = ar->lleft; __entry->lright = ar->lright; __entry->pleft = ar->pleft; __entry->pright = ar->pright; __entry->flags = ar->flags; ), TP_printk("dev %d,%d ino %lu flags %s len %u lblk %u goal %llu " "lleft %u lright %u pleft %llu pright %llu ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, show_mballoc_flags(__entry->flags), __entry->len, __entry->logical, __entry->goal, __entry->lleft, __entry->lright, __entry->pleft, __entry->pright) ); TRACE_EVENT(ext4_allocate_blocks, TP_PROTO(struct ext4_allocation_request *ar, unsigned long long block), TP_ARGS(ar, block), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, block ) __field( unsigned int, len ) __field( __u32, logical ) __field( __u32, lleft ) __field( __u32, lright ) __field( __u64, goal ) __field( __u64, pleft ) __field( __u64, pright ) __field( unsigned int, flags ) ), TP_fast_assign( __entry->dev = ar->inode->i_sb->s_dev; __entry->ino = ar->inode->i_ino; __entry->block = block; __entry->len = ar->len; __entry->logical = ar->logical; __entry->goal = ar->goal; __entry->lleft = ar->lleft; __entry->lright = ar->lright; __entry->pleft = ar->pleft; __entry->pright = ar->pright; __entry->flags = ar->flags; ), TP_printk("dev %d,%d ino %lu flags %s len %u block %llu lblk %u " "goal %llu lleft %u lright %u pleft %llu pright %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, show_mballoc_flags(__entry->flags), __entry->len, __entry->block, __entry->logical, __entry->goal, __entry->lleft, __entry->lright, __entry->pleft, __entry->pright) ); TRACE_EVENT(ext4_free_blocks, TP_PROTO(struct inode *inode, __u64 block, unsigned long count, int flags), TP_ARGS(inode, block, count, flags), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, block ) __field( unsigned long, count ) __field( int, flags ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->block = block; __entry->count = count; __entry->flags = flags; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o block %llu count %lu flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->block, __entry->count, show_free_flags(__entry->flags)) ); TRACE_EVENT(ext4_sync_file_enter, TP_PROTO(struct file *file, int datasync), TP_ARGS(file, datasync), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ino_t, parent ) __field( int, datasync ) ), TP_fast_assign( struct dentry *dentry = file->f_path.dentry; __entry->dev = dentry->d_sb->s_dev; __entry->ino = d_inode(dentry)->i_ino; __entry->datasync = datasync; __entry->parent = d_inode(dentry->d_parent)->i_ino; ), TP_printk("dev %d,%d ino %lu parent %lu datasync %d ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->parent, __entry->datasync) ); TRACE_EVENT(ext4_sync_file_exit, TP_PROTO(struct inode *inode, int ret), TP_ARGS(inode, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, ret ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->ret = ret; ), TP_printk("dev %d,%d ino %lu ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->ret) ); TRACE_EVENT(ext4_sync_fs, TP_PROTO(struct super_block *sb, int wait), TP_ARGS(sb, wait), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, wait ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->wait = wait; ), TP_printk("dev %d,%d wait %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->wait) ); TRACE_EVENT(ext4_alloc_da_blocks, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( unsigned int, data_blocks ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks; ), TP_printk("dev %d,%d ino %lu reserved_data_blocks %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->data_blocks) ); TRACE_EVENT(ext4_mballoc_alloc, TP_PROTO(struct ext4_allocation_context *ac), TP_ARGS(ac), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u32, orig_logical ) __field( int, orig_start ) __field( __u32, orig_group ) __field( int, orig_len ) __field( __u32, goal_logical ) __field( int, goal_start ) __field( __u32, goal_group ) __field( int, goal_len ) __field( __u32, result_logical ) __field( int, result_start ) __field( __u32, result_group ) __field( int, result_len ) __field( __u16, found ) __field( __u16, groups ) __field( __u16, buddy ) __field( __u16, flags ) __field( __u16, tail ) __field( __u8, cr ) ), TP_fast_assign( __entry->dev = ac->ac_inode->i_sb->s_dev; __entry->ino = ac->ac_inode->i_ino; __entry->orig_logical = ac->ac_o_ex.fe_logical; __entry->orig_start = ac->ac_o_ex.fe_start; __entry->orig_group = ac->ac_o_ex.fe_group; __entry->orig_len = ac->ac_o_ex.fe_len; __entry->goal_logical = ac->ac_g_ex.fe_logical; __entry->goal_start = ac->ac_g_ex.fe_start; __entry->goal_group = ac->ac_g_ex.fe_group; __entry->goal_len = ac->ac_g_ex.fe_len; __entry->result_logical = ac->ac_f_ex.fe_logical; __entry->result_start = ac->ac_f_ex.fe_start; __entry->result_group = ac->ac_f_ex.fe_group; __entry->result_len = ac->ac_f_ex.fe_len; __entry->found = ac->ac_found; __entry->flags = ac->ac_flags; __entry->groups = ac->ac_groups_scanned; __entry->buddy = ac->ac_buddy; __entry->tail = ac->ac_tail; __entry->cr = ac->ac_criteria; ), TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u " "result %u/%d/%u@%u blks %u grps %u cr %u flags %s " "tail %u broken %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->orig_group, __entry->orig_start, __entry->orig_len, __entry->orig_logical, __entry->goal_group, __entry->goal_start, __entry->goal_len, __entry->goal_logical, __entry->result_group, __entry->result_start, __entry->result_len, __entry->result_logical, __entry->found, __entry->groups, __entry->cr, show_mballoc_flags(__entry->flags), __entry->tail, __entry->buddy ? 1 << __entry->buddy : 0) ); TRACE_EVENT(ext4_mballoc_prealloc, TP_PROTO(struct ext4_allocation_context *ac), TP_ARGS(ac), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u32, orig_logical ) __field( int, orig_start ) __field( __u32, orig_group ) __field( int, orig_len ) __field( __u32, result_logical ) __field( int, result_start ) __field( __u32, result_group ) __field( int, result_len ) ), TP_fast_assign( __entry->dev = ac->ac_inode->i_sb->s_dev; __entry->ino = ac->ac_inode->i_ino; __entry->orig_logical = ac->ac_o_ex.fe_logical; __entry->orig_start = ac->ac_o_ex.fe_start; __entry->orig_group = ac->ac_o_ex.fe_group; __entry->orig_len = ac->ac_o_ex.fe_len; __entry->result_logical = ac->ac_b_ex.fe_logical; __entry->result_start = ac->ac_b_ex.fe_start; __entry->result_group = ac->ac_b_ex.fe_group; __entry->result_len = ac->ac_b_ex.fe_len; ), TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->orig_group, __entry->orig_start, __entry->orig_len, __entry->orig_logical, __entry->result_group, __entry->result_start, __entry->result_len, __entry->result_logical) ); DECLARE_EVENT_CLASS(ext4__mballoc, TP_PROTO(struct super_block *sb, struct inode *inode, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t len), TP_ARGS(sb, inode, group, start, len), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, result_start ) __field( __u32, result_group ) __field( int, result_len ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->ino = inode ? inode->i_ino : 0; __entry->result_start = start; __entry->result_group = group; __entry->result_len = len; ), TP_printk("dev %d,%d inode %lu extent %u/%d/%d ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->result_group, __entry->result_start, __entry->result_len) ); DEFINE_EVENT(ext4__mballoc, ext4_mballoc_discard, TP_PROTO(struct super_block *sb, struct inode *inode, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t len), TP_ARGS(sb, inode, group, start, len) ); DEFINE_EVENT(ext4__mballoc, ext4_mballoc_free, TP_PROTO(struct super_block *sb, struct inode *inode, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t len), TP_ARGS(sb, inode, group, start, len) ); TRACE_EVENT(ext4_forget, TP_PROTO(struct inode *inode, int is_metadata, __u64 block), TP_ARGS(inode, is_metadata, block), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, block ) __field( int, is_metadata ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->block = block; __entry->is_metadata = is_metadata; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->is_metadata, __entry->block) ); TRACE_EVENT(ext4_da_update_reserve_space, TP_PROTO(struct inode *inode, int used_blocks, int quota_claim), TP_ARGS(inode, used_blocks, quota_claim), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, i_blocks ) __field( int, used_blocks ) __field( int, reserved_data_blocks ) __field( int, quota_claim ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->i_blocks = inode->i_blocks; __entry->used_blocks = used_blocks; __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; __entry->quota_claim = quota_claim; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d " "reserved_data_blocks %d quota_claim %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->i_blocks, __entry->used_blocks, __entry->reserved_data_blocks, __entry->quota_claim) ); TRACE_EVENT(ext4_da_reserve_space, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, i_blocks ) __field( int, reserved_data_blocks ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->i_blocks = inode->i_blocks; __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu " "reserved_data_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->i_blocks, __entry->reserved_data_blocks) ); TRACE_EVENT(ext4_da_release_space, TP_PROTO(struct inode *inode, int freed_blocks), TP_ARGS(inode, freed_blocks), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, i_blocks ) __field( int, freed_blocks ) __field( int, reserved_data_blocks ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->i_blocks = inode->i_blocks; __entry->freed_blocks = freed_blocks; __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d " "reserved_data_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->i_blocks, __entry->freed_blocks, __entry->reserved_data_blocks) ); DECLARE_EVENT_CLASS(ext4__bitmap_load, TP_PROTO(struct super_block *sb, unsigned long group), TP_ARGS(sb, group), TP_STRUCT__entry( __field( dev_t, dev ) __field( __u32, group ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->group = group; ), TP_printk("dev %d,%d group %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group) ); DEFINE_EVENT(ext4__bitmap_load, ext4_mb_bitmap_load, TP_PROTO(struct super_block *sb, unsigned long group), TP_ARGS(sb, group) ); DEFINE_EVENT(ext4__bitmap_load, ext4_mb_buddy_bitmap_load, TP_PROTO(struct super_block *sb, unsigned long group), TP_ARGS(sb, group) ); DEFINE_EVENT(ext4__bitmap_load, ext4_load_inode_bitmap, TP_PROTO(struct super_block *sb, unsigned long group), TP_ARGS(sb, group) ); TRACE_EVENT(ext4_read_block_bitmap_load, TP_PROTO(struct super_block *sb, unsigned long group, bool prefetch), TP_ARGS(sb, group, prefetch), TP_STRUCT__entry( __field( dev_t, dev ) __field( __u32, group ) __field( bool, prefetch ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->group = group; __entry->prefetch = prefetch; ), TP_printk("dev %d,%d group %u prefetch %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group, __entry->prefetch) ); TRACE_EVENT(ext4_direct_IO_enter, TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw), TP_ARGS(inode, offset, len, rw), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, pos ) __field( unsigned long, len ) __field( int, rw ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pos = offset; __entry->len = len; __entry->rw = rw; ), TP_printk("dev %d,%d ino %lu pos %lld len %lu rw %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->len, __entry->rw) ); TRACE_EVENT(ext4_direct_IO_exit, TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw, int ret), TP_ARGS(inode, offset, len, rw, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, pos ) __field( unsigned long, len ) __field( int, rw ) __field( int, ret ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pos = offset; __entry->len = len; __entry->rw = rw; __entry->ret = ret; ), TP_printk("dev %d,%d ino %lu pos %lld len %lu rw %d ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->len, __entry->rw, __entry->ret) ); DECLARE_EVENT_CLASS(ext4__fallocate_mode, TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), TP_ARGS(inode, offset, len, mode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, offset ) __field( loff_t, len ) __field( int, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->offset = offset; __entry->len = len; __entry->mode = mode; ), TP_printk("dev %d,%d ino %lu offset %lld len %lld mode %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->offset, __entry->len, show_falloc_mode(__entry->mode)) ); DEFINE_EVENT(ext4__fallocate_mode, ext4_fallocate_enter, TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), TP_ARGS(inode, offset, len, mode) ); DEFINE_EVENT(ext4__fallocate_mode, ext4_punch_hole, TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), TP_ARGS(inode, offset, len, mode) ); DEFINE_EVENT(ext4__fallocate_mode, ext4_zero_range, TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), TP_ARGS(inode, offset, len, mode) ); TRACE_EVENT(ext4_fallocate_exit, TP_PROTO(struct inode *inode, loff_t offset, unsigned int max_blocks, int ret), TP_ARGS(inode, offset, max_blocks, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, pos ) __field( unsigned int, blocks ) __field( int, ret ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pos = offset; __entry->blocks = max_blocks; __entry->ret = ret; ), TP_printk("dev %d,%d ino %lu pos %lld blocks %u ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->blocks, __entry->ret) ); TRACE_EVENT(ext4_unlink_enter, TP_PROTO(struct inode *parent, struct dentry *dentry), TP_ARGS(parent, dentry), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ino_t, parent ) __field( loff_t, size ) ), TP_fast_assign( __entry->dev = dentry->d_sb->s_dev; __entry->ino = d_inode(dentry)->i_ino; __entry->parent = parent->i_ino; __entry->size = d_inode(dentry)->i_size; ), TP_printk("dev %d,%d ino %lu size %lld parent %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->size, (unsigned long) __entry->parent) ); TRACE_EVENT(ext4_unlink_exit, TP_PROTO(struct dentry *dentry, int ret), TP_ARGS(dentry, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, ret ) ), TP_fast_assign( __entry->dev = dentry->d_sb->s_dev; __entry->ino = d_inode(dentry)->i_ino; __entry->ret = ret; ), TP_printk("dev %d,%d ino %lu ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->ret) ); DECLARE_EVENT_CLASS(ext4__truncate, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, blocks ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->blocks = inode->i_blocks; ), TP_printk("dev %d,%d ino %lu blocks %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->blocks) ); DEFINE_EVENT(ext4__truncate, ext4_truncate_enter, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); DEFINE_EVENT(ext4__truncate, ext4_truncate_exit, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); /* 'ux' is the unwritten extent. */ TRACE_EVENT(ext4_ext_convert_to_initialized_enter, TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, struct ext4_extent *ux), TP_ARGS(inode, map, ux), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, m_lblk ) __field( unsigned, m_len ) __field( ext4_lblk_t, u_lblk ) __field( unsigned, u_len ) __field( ext4_fsblk_t, u_pblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->m_lblk = map->m_lblk; __entry->m_len = map->m_len; __entry->u_lblk = le32_to_cpu(ux->ee_block); __entry->u_len = ext4_ext_get_actual_len(ux); __entry->u_pblk = ext4_ext_pblock(ux); ), TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u u_lblk %u u_len %u " "u_pblk %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->m_lblk, __entry->m_len, __entry->u_lblk, __entry->u_len, __entry->u_pblk) ); /* * 'ux' is the unwritten extent. * 'ix' is the initialized extent to which blocks are transferred. */ TRACE_EVENT(ext4_ext_convert_to_initialized_fastpath, TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, struct ext4_extent *ux, struct ext4_extent *ix), TP_ARGS(inode, map, ux, ix), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, m_lblk ) __field( unsigned, m_len ) __field( ext4_lblk_t, u_lblk ) __field( unsigned, u_len ) __field( ext4_fsblk_t, u_pblk ) __field( ext4_lblk_t, i_lblk ) __field( unsigned, i_len ) __field( ext4_fsblk_t, i_pblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->m_lblk = map->m_lblk; __entry->m_len = map->m_len; __entry->u_lblk = le32_to_cpu(ux->ee_block); __entry->u_len = ext4_ext_get_actual_len(ux); __entry->u_pblk = ext4_ext_pblock(ux); __entry->i_lblk = le32_to_cpu(ix->ee_block); __entry->i_len = ext4_ext_get_actual_len(ix); __entry->i_pblk = ext4_ext_pblock(ix); ), TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u " "u_lblk %u u_len %u u_pblk %llu " "i_lblk %u i_len %u i_pblk %llu ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->m_lblk, __entry->m_len, __entry->u_lblk, __entry->u_len, __entry->u_pblk, __entry->i_lblk, __entry->i_len, __entry->i_pblk) ); DECLARE_EVENT_CLASS(ext4__map_blocks_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len, unsigned int flags), TP_ARGS(inode, lblk, len, flags), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( unsigned int, len ) __field( unsigned int, flags ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = lblk; __entry->len = len; __entry->flags = flags; ), TP_printk("dev %d,%d ino %lu lblk %u len %u flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, show_map_flags(__entry->flags)) ); DEFINE_EVENT(ext4__map_blocks_enter, ext4_ext_map_blocks_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned len, unsigned flags), TP_ARGS(inode, lblk, len, flags) ); DEFINE_EVENT(ext4__map_blocks_enter, ext4_ind_map_blocks_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned len, unsigned flags), TP_ARGS(inode, lblk, len, flags) ); DECLARE_EVENT_CLASS(ext4__map_blocks_exit, TP_PROTO(struct inode *inode, unsigned flags, struct ext4_map_blocks *map, int ret), TP_ARGS(inode, flags, map, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( unsigned int, flags ) __field( ext4_fsblk_t, pblk ) __field( ext4_lblk_t, lblk ) __field( unsigned int, len ) __field( unsigned int, mflags ) __field( int, ret ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->flags = flags; __entry->pblk = map->m_pblk; __entry->lblk = map->m_lblk; __entry->len = map->m_len; __entry->mflags = map->m_flags; __entry->ret = ret; ), TP_printk("dev %d,%d ino %lu flags %s lblk %u pblk %llu len %u " "mflags %s ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, show_map_flags(__entry->flags), __entry->lblk, __entry->pblk, __entry->len, show_mflags(__entry->mflags), __entry->ret) ); DEFINE_EVENT(ext4__map_blocks_exit, ext4_ext_map_blocks_exit, TP_PROTO(struct inode *inode, unsigned flags, struct ext4_map_blocks *map, int ret), TP_ARGS(inode, flags, map, ret) ); DEFINE_EVENT(ext4__map_blocks_exit, ext4_ind_map_blocks_exit, TP_PROTO(struct inode *inode, unsigned flags, struct ext4_map_blocks *map, int ret), TP_ARGS(inode, flags, map, ret) ); TRACE_EVENT(ext4_ext_load_extent, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk), TP_ARGS(inode, lblk, pblk), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_fsblk_t, pblk ) __field( ext4_lblk_t, lblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pblk = pblk; __entry->lblk = lblk; ), TP_printk("dev %d,%d ino %lu lblk %u pblk %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->pblk) ); TRACE_EVENT(ext4_load_inode, TP_PROTO(struct super_block *sb, unsigned long ino), TP_ARGS(sb, ino), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->ino = ino; ), TP_printk("dev %d,%d ino %ld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino) ); TRACE_EVENT(ext4_journal_start, TP_PROTO(struct super_block *sb, int blocks, int rsv_blocks, int revoke_creds, unsigned long IP), TP_ARGS(sb, blocks, rsv_blocks, revoke_creds, IP), TP_STRUCT__entry( __field( dev_t, dev ) __field(unsigned long, ip ) __field( int, blocks ) __field( int, rsv_blocks ) __field( int, revoke_creds ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->ip = IP; __entry->blocks = blocks; __entry->rsv_blocks = rsv_blocks; __entry->revoke_creds = revoke_creds; ), TP_printk("dev %d,%d blocks %d, rsv_blocks %d, revoke_creds %d, " "caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->blocks, __entry->rsv_blocks, __entry->revoke_creds, (void *)__entry->ip) ); TRACE_EVENT(ext4_journal_start_reserved, TP_PROTO(struct super_block *sb, int blocks, unsigned long IP), TP_ARGS(sb, blocks, IP), TP_STRUCT__entry( __field( dev_t, dev ) __field(unsigned long, ip ) __field( int, blocks ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->ip = IP; __entry->blocks = blocks; ), TP_printk("dev %d,%d blocks, %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->blocks, (void *)__entry->ip) ); DECLARE_EVENT_CLASS(ext4__trim, TP_PROTO(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t len), TP_ARGS(sb, group, start, len), TP_STRUCT__entry( __field( int, dev_major ) __field( int, dev_minor ) __field( __u32, group ) __field( int, start ) __field( int, len ) ), TP_fast_assign( __entry->dev_major = MAJOR(sb->s_dev); __entry->dev_minor = MINOR(sb->s_dev); __entry->group = group; __entry->start = start; __entry->len = len; ), TP_printk("dev %d,%d group %u, start %d, len %d", __entry->dev_major, __entry->dev_minor, __entry->group, __entry->start, __entry->len) ); DEFINE_EVENT(ext4__trim, ext4_trim_extent, TP_PROTO(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t len), TP_ARGS(sb, group, start, len) ); DEFINE_EVENT(ext4__trim, ext4_trim_all_free, TP_PROTO(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t len), TP_ARGS(sb, group, start, len) ); TRACE_EVENT(ext4_ext_handle_unwritten_extents, TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, int flags, unsigned int allocated, ext4_fsblk_t newblock), TP_ARGS(inode, map, flags, allocated, newblock), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, flags ) __field( ext4_lblk_t, lblk ) __field( ext4_fsblk_t, pblk ) __field( unsigned int, len ) __field( unsigned int, allocated ) __field( ext4_fsblk_t, newblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->flags = flags; __entry->lblk = map->m_lblk; __entry->pblk = map->m_pblk; __entry->len = map->m_len; __entry->allocated = allocated; __entry->newblk = newblock; ), TP_printk("dev %d,%d ino %lu m_lblk %u m_pblk %llu m_len %u flags %s " "allocated %d newblock %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->lblk, (unsigned long long) __entry->pblk, __entry->len, show_map_flags(__entry->flags), (unsigned int) __entry->allocated, (unsigned long long) __entry->newblk) ); TRACE_EVENT(ext4_get_implied_cluster_alloc_exit, TP_PROTO(struct super_block *sb, struct ext4_map_blocks *map, int ret), TP_ARGS(sb, map, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( unsigned int, flags ) __field( ext4_lblk_t, lblk ) __field( ext4_fsblk_t, pblk ) __field( unsigned int, len ) __field( int, ret ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->flags = map->m_flags; __entry->lblk = map->m_lblk; __entry->pblk = map->m_pblk; __entry->len = map->m_len; __entry->ret = ret; ), TP_printk("dev %d,%d m_lblk %u m_pblk %llu m_len %u m_flags %s ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->lblk, (unsigned long long) __entry->pblk, __entry->len, show_mflags(__entry->flags), __entry->ret) ); TRACE_EVENT(ext4_ext_put_in_cache, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len, ext4_fsblk_t start), TP_ARGS(inode, lblk, len, start), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( unsigned int, len ) __field( ext4_fsblk_t, start ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = lblk; __entry->len = len; __entry->start = start; ), TP_printk("dev %d,%d ino %lu lblk %u len %u start %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->lblk, __entry->len, (unsigned long long) __entry->start) ); TRACE_EVENT(ext4_ext_in_cache, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, int ret), TP_ARGS(inode, lblk, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( int, ret ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = lblk; __entry->ret = ret; ), TP_printk("dev %d,%d ino %lu lblk %u ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->lblk, __entry->ret) ); TRACE_EVENT(ext4_find_delalloc_range, TP_PROTO(struct inode *inode, ext4_lblk_t from, ext4_lblk_t to, int reverse, int found, ext4_lblk_t found_blk), TP_ARGS(inode, from, to, reverse, found, found_blk), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, from ) __field( ext4_lblk_t, to ) __field( int, reverse ) __field( int, found ) __field( ext4_lblk_t, found_blk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->from = from; __entry->to = to; __entry->reverse = reverse; __entry->found = found; __entry->found_blk = found_blk; ), TP_printk("dev %d,%d ino %lu from %u to %u reverse %d found %d " "(blk = %u)", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->from, (unsigned) __entry->to, __entry->reverse, __entry->found, (unsigned) __entry->found_blk) ); TRACE_EVENT(ext4_get_reserved_cluster_alloc, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len), TP_ARGS(inode, lblk, len), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( unsigned int, len ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = lblk; __entry->len = len; ), TP_printk("dev %d,%d ino %lu lblk %u len %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->lblk, __entry->len) ); TRACE_EVENT(ext4_ext_show_extent, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, unsigned short len), TP_ARGS(inode, lblk, pblk, len), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_fsblk_t, pblk ) __field( ext4_lblk_t, lblk ) __field( unsigned short, len ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pblk = pblk; __entry->lblk = lblk; __entry->len = len; ), TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->lblk, (unsigned long long) __entry->pblk, (unsigned short) __entry->len) ); TRACE_EVENT(ext4_remove_blocks, TP_PROTO(struct inode *inode, struct ext4_extent *ex, ext4_lblk_t from, ext4_fsblk_t to, struct partial_cluster *pc), TP_ARGS(inode, ex, from, to, pc), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, from ) __field( ext4_lblk_t, to ) __field( ext4_fsblk_t, ee_pblk ) __field( ext4_lblk_t, ee_lblk ) __field( unsigned short, ee_len ) __field( ext4_fsblk_t, pc_pclu ) __field( ext4_lblk_t, pc_lblk ) __field( int, pc_state) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->from = from; __entry->to = to; __entry->ee_pblk = ext4_ext_pblock(ex); __entry->ee_lblk = le32_to_cpu(ex->ee_block); __entry->ee_len = ext4_ext_get_actual_len(ex); __entry->pc_pclu = pc->pclu; __entry->pc_lblk = pc->lblk; __entry->pc_state = pc->state; ), TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]" "from %u to %u partial [pclu %lld lblk %u state %d]", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->ee_lblk, (unsigned long long) __entry->ee_pblk, (unsigned short) __entry->ee_len, (unsigned) __entry->from, (unsigned) __entry->to, (long long) __entry->pc_pclu, (unsigned int) __entry->pc_lblk, (int) __entry->pc_state) ); TRACE_EVENT(ext4_ext_rm_leaf, TP_PROTO(struct inode *inode, ext4_lblk_t start, struct ext4_extent *ex, struct partial_cluster *pc), TP_ARGS(inode, start, ex, pc), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, start ) __field( ext4_lblk_t, ee_lblk ) __field( ext4_fsblk_t, ee_pblk ) __field( short, ee_len ) __field( ext4_fsblk_t, pc_pclu ) __field( ext4_lblk_t, pc_lblk ) __field( int, pc_state) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->start = start; __entry->ee_lblk = le32_to_cpu(ex->ee_block); __entry->ee_pblk = ext4_ext_pblock(ex); __entry->ee_len = ext4_ext_get_actual_len(ex); __entry->pc_pclu = pc->pclu; __entry->pc_lblk = pc->lblk; __entry->pc_state = pc->state; ), TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]" "partial [pclu %lld lblk %u state %d]", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->start, (unsigned) __entry->ee_lblk, (unsigned long long) __entry->ee_pblk, (unsigned short) __entry->ee_len, (long long) __entry->pc_pclu, (unsigned int) __entry->pc_lblk, (int) __entry->pc_state) ); TRACE_EVENT(ext4_ext_rm_idx, TP_PROTO(struct inode *inode, ext4_fsblk_t pblk), TP_ARGS(inode, pblk), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_fsblk_t, pblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pblk = pblk; ), TP_printk("dev %d,%d ino %lu index_pblk %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long long) __entry->pblk) ); TRACE_EVENT(ext4_ext_remove_space, TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t end, int depth), TP_ARGS(inode, start, end, depth), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, start ) __field( ext4_lblk_t, end ) __field( int, depth ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->start = start; __entry->end = end; __entry->depth = depth; ), TP_printk("dev %d,%d ino %lu since %u end %u depth %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->start, (unsigned) __entry->end, __entry->depth) ); TRACE_EVENT(ext4_ext_remove_space_done, TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t end, int depth, struct partial_cluster *pc, __le16 eh_entries), TP_ARGS(inode, start, end, depth, pc, eh_entries), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, start ) __field( ext4_lblk_t, end ) __field( int, depth ) __field( ext4_fsblk_t, pc_pclu ) __field( ext4_lblk_t, pc_lblk ) __field( int, pc_state ) __field( unsigned short, eh_entries ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->start = start; __entry->end = end; __entry->depth = depth; __entry->pc_pclu = pc->pclu; __entry->pc_lblk = pc->lblk; __entry->pc_state = pc->state; __entry->eh_entries = le16_to_cpu(eh_entries); ), TP_printk("dev %d,%d ino %lu since %u end %u depth %d " "partial [pclu %lld lblk %u state %d] " "remaining_entries %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->start, (unsigned) __entry->end, __entry->depth, (long long) __entry->pc_pclu, (unsigned int) __entry->pc_lblk, (int) __entry->pc_state, (unsigned short) __entry->eh_entries) ); DECLARE_EVENT_CLASS(ext4__es_extent, TP_PROTO(struct inode *inode, struct extent_status *es), TP_ARGS(inode, es), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( ext4_lblk_t, len ) __field( ext4_fsblk_t, pblk ) __field( char, status ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = es->es_lblk; __entry->len = es->es_len; __entry->pblk = ext4_es_show_pblock(es); __entry->status = ext4_es_status(es); ), TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, __entry->pblk, show_extent_status(__entry->status)) ); DEFINE_EVENT(ext4__es_extent, ext4_es_insert_extent, TP_PROTO(struct inode *inode, struct extent_status *es), TP_ARGS(inode, es) ); DEFINE_EVENT(ext4__es_extent, ext4_es_cache_extent, TP_PROTO(struct inode *inode, struct extent_status *es), TP_ARGS(inode, es) ); TRACE_EVENT(ext4_es_remove_extent, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len), TP_ARGS(inode, lblk, len), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, lblk ) __field( loff_t, len ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = lblk; __entry->len = len; ), TP_printk("dev %d,%d ino %lu es [%lld/%lld)", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len) ); TRACE_EVENT(ext4_es_find_extent_range_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk), TP_ARGS(inode, lblk), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = lblk; ), TP_printk("dev %d,%d ino %lu lblk %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk) ); TRACE_EVENT(ext4_es_find_extent_range_exit, TP_PROTO(struct inode *inode, struct extent_status *es), TP_ARGS(inode, es), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( ext4_lblk_t, len ) __field( ext4_fsblk_t, pblk ) __field( char, status ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = es->es_lblk; __entry->len = es->es_len; __entry->pblk = ext4_es_show_pblock(es); __entry->status = ext4_es_status(es); ), TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, __entry->pblk, show_extent_status(__entry->status)) ); TRACE_EVENT(ext4_es_lookup_extent_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk), TP_ARGS(inode, lblk), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = lblk; ), TP_printk("dev %d,%d ino %lu lblk %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk) ); TRACE_EVENT(ext4_es_lookup_extent_exit, TP_PROTO(struct inode *inode, struct extent_status *es, int found), TP_ARGS(inode, es, found), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( ext4_lblk_t, len ) __field( ext4_fsblk_t, pblk ) __field( char, status ) __field( int, found ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = es->es_lblk; __entry->len = es->es_len; __entry->pblk = ext4_es_show_pblock(es); __entry->status = ext4_es_status(es); __entry->found = found; ), TP_printk("dev %d,%d ino %lu found %d [%u/%u) %llu %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->found, __entry->lblk, __entry->len, __entry->found ? __entry->pblk : 0, show_extent_status(__entry->found ? __entry->status : 0)) ); DECLARE_EVENT_CLASS(ext4__es_shrink_enter, TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt), TP_ARGS(sb, nr_to_scan, cache_cnt), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, nr_to_scan ) __field( int, cache_cnt ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->nr_to_scan = nr_to_scan; __entry->cache_cnt = cache_cnt; ), TP_printk("dev %d,%d nr_to_scan %d cache_cnt %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_to_scan, __entry->cache_cnt) ); DEFINE_EVENT(ext4__es_shrink_enter, ext4_es_shrink_count, TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt), TP_ARGS(sb, nr_to_scan, cache_cnt) ); DEFINE_EVENT(ext4__es_shrink_enter, ext4_es_shrink_scan_enter, TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt), TP_ARGS(sb, nr_to_scan, cache_cnt) ); TRACE_EVENT(ext4_es_shrink_scan_exit, TP_PROTO(struct super_block *sb, int nr_shrunk, int cache_cnt), TP_ARGS(sb, nr_shrunk, cache_cnt), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, nr_shrunk ) __field( int, cache_cnt ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->nr_shrunk = nr_shrunk; __entry->cache_cnt = cache_cnt; ), TP_printk("dev %d,%d nr_shrunk %d cache_cnt %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk, __entry->cache_cnt) ); TRACE_EVENT(ext4_collapse_range, TP_PROTO(struct inode *inode, loff_t offset, loff_t len), TP_ARGS(inode, offset, len), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) __field(loff_t, offset) __field(loff_t, len) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->offset = offset; __entry->len = len; ), TP_printk("dev %d,%d ino %lu offset %lld len %lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->offset, __entry->len) ); TRACE_EVENT(ext4_insert_range, TP_PROTO(struct inode *inode, loff_t offset, loff_t len), TP_ARGS(inode, offset, len), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) __field(loff_t, offset) __field(loff_t, len) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->offset = offset; __entry->len = len; ), TP_printk("dev %d,%d ino %lu offset %lld len %lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->offset, __entry->len) ); TRACE_EVENT(ext4_es_shrink, TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time, int nr_skipped, int retried), TP_ARGS(sb, nr_shrunk, scan_time, nr_skipped, retried), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, nr_shrunk ) __field( unsigned long long, scan_time ) __field( int, nr_skipped ) __field( int, retried ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->nr_shrunk = nr_shrunk; __entry->scan_time = div_u64(scan_time, 1000); __entry->nr_skipped = nr_skipped; __entry->retried = retried; ), TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu " "nr_skipped %d retried %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk, __entry->scan_time, __entry->nr_skipped, __entry->retried) ); TRACE_EVENT(ext4_es_insert_delayed_block, TP_PROTO(struct inode *inode, struct extent_status *es, bool allocated), TP_ARGS(inode, es, allocated), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( ext4_lblk_t, len ) __field( ext4_fsblk_t, pblk ) __field( char, status ) __field( bool, allocated ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = es->es_lblk; __entry->len = es->es_len; __entry->pblk = ext4_es_show_pblock(es); __entry->status = ext4_es_status(es); __entry->allocated = allocated; ), TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s " "allocated %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, __entry->pblk, show_extent_status(__entry->status), __entry->allocated) ); /* fsmap traces */ DECLARE_EVENT_CLASS(ext4_fsmap_class, TP_PROTO(struct super_block *sb, u32 keydev, u32 agno, u64 bno, u64 len, u64 owner), TP_ARGS(sb, keydev, agno, bno, len, owner), TP_STRUCT__entry( __field(dev_t, dev) __field(dev_t, keydev) __field(u32, agno) __field(u64, bno) __field(u64, len) __field(u64, owner) ), TP_fast_assign( __entry->dev = sb->s_bdev->bd_dev; __entry->keydev = new_decode_dev(keydev); __entry->agno = agno; __entry->bno = bno; __entry->len = len; __entry->owner = owner; ), TP_printk("dev %d:%d keydev %d:%d agno %u bno %llu len %llu owner %lld\n", MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->keydev), MINOR(__entry->keydev), __entry->agno, __entry->bno, __entry->len, __entry->owner) ) #define DEFINE_FSMAP_EVENT(name) \ DEFINE_EVENT(ext4_fsmap_class, name, \ TP_PROTO(struct super_block *sb, u32 keydev, u32 agno, u64 bno, u64 len, \ u64 owner), \ TP_ARGS(sb, keydev, agno, bno, len, owner)) DEFINE_FSMAP_EVENT(ext4_fsmap_low_key); DEFINE_FSMAP_EVENT(ext4_fsmap_high_key); DEFINE_FSMAP_EVENT(ext4_fsmap_mapping); DECLARE_EVENT_CLASS(ext4_getfsmap_class, TP_PROTO(struct super_block *sb, struct ext4_fsmap *fsmap), TP_ARGS(sb, fsmap), TP_STRUCT__entry( __field(dev_t, dev) __field(dev_t, keydev) __field(u64, block) __field(u64, len) __field(u64, owner) __field(u64, flags) ), TP_fast_assign( __entry->dev = sb->s_bdev->bd_dev; __entry->keydev = new_decode_dev(fsmap->fmr_device); __entry->block = fsmap->fmr_physical; __entry->len = fsmap->fmr_length; __entry->owner = fsmap->fmr_owner; __entry->flags = fsmap->fmr_flags; ), TP_printk("dev %d:%d keydev %d:%d block %llu len %llu owner %lld flags 0x%llx\n", MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->keydev), MINOR(__entry->keydev), __entry->block, __entry->len, __entry->owner, __entry->flags) ) #define DEFINE_GETFSMAP_EVENT(name) \ DEFINE_EVENT(ext4_getfsmap_class, name, \ TP_PROTO(struct super_block *sb, struct ext4_fsmap *fsmap), \ TP_ARGS(sb, fsmap)) DEFINE_GETFSMAP_EVENT(ext4_getfsmap_low_key); DEFINE_GETFSMAP_EVENT(ext4_getfsmap_high_key); DEFINE_GETFSMAP_EVENT(ext4_getfsmap_mapping); TRACE_EVENT(ext4_shutdown, TP_PROTO(struct super_block *sb, unsigned long flags), TP_ARGS(sb, flags), TP_STRUCT__entry( __field( dev_t, dev ) __field( unsigned, flags ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->flags = flags; ), TP_printk("dev %d,%d flags %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->flags) ); TRACE_EVENT(ext4_error, TP_PROTO(struct super_block *sb, const char *function, unsigned int line), TP_ARGS(sb, function, line), TP_STRUCT__entry( __field( dev_t, dev ) __field( const char *, function ) __field( unsigned, line ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->function = function; __entry->line = line; ), TP_printk("dev %d,%d function %s line %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->function, __entry->line) ); TRACE_EVENT(ext4_prefetch_bitmaps, TP_PROTO(struct super_block *sb, ext4_group_t group, ext4_group_t next, unsigned int prefetch_ios), TP_ARGS(sb, group, next, prefetch_ios), TP_STRUCT__entry( __field( dev_t, dev ) __field( __u32, group ) __field( __u32, next ) __field( __u32, ios ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->group = group; __entry->next = next; __entry->ios = prefetch_ios; ), TP_printk("dev %d,%d group %u next %u ios %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group, __entry->next, __entry->ios) ); TRACE_EVENT(ext4_lazy_itable_init, TP_PROTO(struct super_block *sb, ext4_group_t group), TP_ARGS(sb, group), TP_STRUCT__entry( __field( dev_t, dev ) __field( __u32, group ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->group = group; ), TP_printk("dev %d,%d group %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group) ); TRACE_EVENT(ext4_fc_replay_scan, TP_PROTO(struct super_block *sb, int error, int off), TP_ARGS(sb, error, off), TP_STRUCT__entry( __field(dev_t, dev) __field(int, error) __field(int, off) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->error = error; __entry->off = off; ), TP_printk("FC scan pass on dev %d,%d: error %d, off %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->error, __entry->off) ); TRACE_EVENT(ext4_fc_replay, TP_PROTO(struct super_block *sb, int tag, int ino, int priv1, int priv2), TP_ARGS(sb, tag, ino, priv1, priv2), TP_STRUCT__entry( __field(dev_t, dev) __field(int, tag) __field(int, ino) __field(int, priv1) __field(int, priv2) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->tag = tag; __entry->ino = ino; __entry->priv1 = priv1; __entry->priv2 = priv2; ), TP_printk("FC Replay %d,%d: tag %d, ino %d, data1 %d, data2 %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tag, __entry->ino, __entry->priv1, __entry->priv2) ); TRACE_EVENT(ext4_fc_commit_start, TP_PROTO(struct super_block *sb), TP_ARGS(sb), TP_STRUCT__entry( __field(dev_t, dev) ), TP_fast_assign( __entry->dev = sb->s_dev; ), TP_printk("fast_commit started on dev %d,%d", MAJOR(__entry->dev), MINOR(__entry->dev)) ); TRACE_EVENT(ext4_fc_commit_stop, TP_PROTO(struct super_block *sb, int nblks, int reason), TP_ARGS(sb, nblks, reason), TP_STRUCT__entry( __field(dev_t, dev) __field(int, nblks) __field(int, reason) __field(int, num_fc) __field(int, num_fc_ineligible) __field(int, nblks_agg) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->nblks = nblks; __entry->reason = reason; __entry->num_fc = EXT4_SB(sb)->s_fc_stats.fc_num_commits; __entry->num_fc_ineligible = EXT4_SB(sb)->s_fc_stats.fc_ineligible_commits; __entry->nblks_agg = EXT4_SB(sb)->s_fc_stats.fc_numblks; ), TP_printk("fc on [%d,%d] nblks %d, reason %d, fc = %d, ineligible = %d, agg_nblks %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nblks, __entry->reason, __entry->num_fc, __entry->num_fc_ineligible, __entry->nblks_agg) ); #define FC_REASON_NAME_STAT(reason) \ show_fc_reason(reason), \ __entry->sbi->s_fc_stats.fc_ineligible_reason_count[reason] TRACE_EVENT(ext4_fc_stats, TP_PROTO(struct super_block *sb), TP_ARGS(sb), TP_STRUCT__entry( __field(dev_t, dev) __field(struct ext4_sb_info *, sbi) __field(int, count) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->sbi = EXT4_SB(sb); ), TP_printk("dev %d:%d fc ineligible reasons:\n" "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; " "num_commits:%ld, ineligible: %ld, numblks: %ld", MAJOR(__entry->dev), MINOR(__entry->dev), FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME), FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE), FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM), FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT), FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE), FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA), __entry->sbi->s_fc_stats.fc_num_commits, __entry->sbi->s_fc_stats.fc_ineligible_commits, __entry->sbi->s_fc_stats.fc_numblks) ); #define DEFINE_TRACE_DENTRY_EVENT(__type) \ TRACE_EVENT(ext4_fc_track_##__type, \ TP_PROTO(struct inode *inode, struct dentry *dentry, int ret), \ \ TP_ARGS(inode, dentry, ret), \ \ TP_STRUCT__entry( \ __field(dev_t, dev) \ __field(int, ino) \ __field(int, error) \ ), \ \ TP_fast_assign( \ __entry->dev = inode->i_sb->s_dev; \ __entry->ino = inode->i_ino; \ __entry->error = ret; \ ), \ \ TP_printk("dev %d:%d, inode %d, error %d, fc_%s", \ MAJOR(__entry->dev), MINOR(__entry->dev), \ __entry->ino, __entry->error, \ #__type) \ ) DEFINE_TRACE_DENTRY_EVENT(create); DEFINE_TRACE_DENTRY_EVENT(link); DEFINE_TRACE_DENTRY_EVENT(unlink); TRACE_EVENT(ext4_fc_track_inode, TP_PROTO(struct inode *inode, int ret), TP_ARGS(inode, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(int, ino) __field(int, error) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->error = ret; ), TP_printk("dev %d:%d, inode %d, error %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->error) ); TRACE_EVENT(ext4_fc_track_range, TP_PROTO(struct inode *inode, long start, long end, int ret), TP_ARGS(inode, start, end, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(int, ino) __field(long, start) __field(long, end) __field(int, error) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->start = start; __entry->end = end; __entry->error = ret; ), TP_printk("dev %d:%d, inode %d, error %d, start %ld, end %ld", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->error, __entry->start, __entry->end) ); #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 /* SPDX-License-Identifier: GPL-2.0 */ /* * Connection state tracking for netfilter. This is separated from, * but required by, the (future) NAT layer; it can also be used by an iptables * extension. * * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> * - generalize L3 protocol dependent part. * * Derived from include/linux/netfiter_ipv4/ip_conntrack.h */ #ifndef _NF_CONNTRACK_H #define _NF_CONNTRACK_H #include <linux/bitops.h> #include <linux/compiler.h> #include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_conntrack_tcp.h> #include <linux/netfilter/nf_conntrack_dccp.h> #include <linux/netfilter/nf_conntrack_sctp.h> #include <linux/netfilter/nf_conntrack_proto_gre.h> #include <net/netfilter/nf_conntrack_tuple.h> struct nf_ct_udp { unsigned long stream_ts; }; /* per conntrack: protocol private data */ union nf_conntrack_proto { /* insert conntrack proto private data here */ struct nf_ct_dccp dccp; struct ip_ct_sctp sctp; struct ip_ct_tcp tcp; struct nf_ct_udp udp; struct nf_ct_gre gre; unsigned int tmpl_padto; }; union nf_conntrack_expect_proto { /* insert expect proto private data here */ }; struct nf_conntrack_net { unsigned int users4; unsigned int users6; unsigned int users_bridge; }; #include <linux/types.h> #include <linux/skbuff.h> #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> struct nf_conn { /* Usage count in here is 1 for hash table, 1 per skb, * plus 1 for any connection(s) we are `master' for * * Hint, SKB address this struct and refcnt via skb->_nfct and * helpers nf_conntrack_get() and nf_conntrack_put(). * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt, * beware nf_ct_get() is different and don't inc refcnt. */ struct nf_conntrack ct_general; spinlock_t lock; /* jiffies32 when this ct is considered dead */ u32 timeout; #ifdef CONFIG_NF_CONNTRACK_ZONES struct nf_conntrack_zone zone; #endif /* XXX should I move this to the tail ? - Y.K */ /* These are my tuples; original and reply */ struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]; /* Have we seen traffic both ways yet? (bitset) */ unsigned long status; u16 cpu; possible_net_t ct_net; #if IS_ENABLED(CONFIG_NF_NAT) struct hlist_node nat_bysource; #endif /* all members below initialized via memset */ struct { } __nfct_init_offset; /* If we were expected by an expectation, this will be it */ struct nf_conn *master; #if defined(CONFIG_NF_CONNTRACK_MARK) u_int32_t mark; #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK u_int32_t secmark; #endif /* Extensions */ struct nf_ct_ext *ext; /* Storage reserved for other modules, must be the last member */ union nf_conntrack_proto proto; }; static inline struct nf_conn * nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash) { return container_of(hash, struct nf_conn, tuplehash[hash->tuple.dst.dir]); } static inline u_int16_t nf_ct_l3num(const struct nf_conn *ct) { return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; } static inline u_int8_t nf_ct_protonum(const struct nf_conn *ct) { return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; } #define nf_ct_tuple(ct, dir) (&(ct)->tuplehash[dir].tuple) /* get master conntrack via master expectation */ #define master_ct(conntr) (conntr->master) extern struct net init_net; static inline struct net *nf_ct_net(const struct nf_conn *ct) { return read_pnet(&ct->ct_net); } /* Alter reply tuple (maybe alter helper). */ void nf_conntrack_alter_reply(struct nf_conn *ct, const struct nf_conntrack_tuple *newreply); /* Is this tuple taken? (ignoring any belonging to the given conntrack). */ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack); /* Return conntrack_info and tuple hash for given skb. */ static inline struct nf_conn * nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) { unsigned long nfct = skb_get_nfct(skb); *ctinfo = nfct & NFCT_INFOMASK; return (struct nf_conn *)(nfct & NFCT_PTRMASK); } /* decrement reference count on a conntrack */ static inline void nf_ct_put(struct nf_conn *ct) { WARN_ON(!ct); nf_conntrack_put(&ct->ct_general); } /* Protocol module loading */ int nf_ct_l3proto_try_module_get(unsigned short l3proto); void nf_ct_l3proto_module_put(unsigned short l3proto); /* load module; enable/disable conntrack in this namespace */ int nf_ct_netns_get(struct net *net, u8 nfproto); void nf_ct_netns_put(struct net *net, u8 nfproto); /* * Allocate a hashtable of hlist_head (if nulls == 0), * or hlist_nulls_head (if nulls == 1) */ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls); int nf_conntrack_hash_check_insert(struct nf_conn *ct); bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report); bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, struct net *net, struct nf_conntrack_tuple *tuple); void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, u32 extra_jiffies, bool do_acct); /* Refresh conntrack for this many jiffies and do accounting */ static inline void nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, u32 extra_jiffies) { __nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, true); } /* Refresh conntrack for this many jiffies */ static inline void nf_ct_refresh(struct nf_conn *ct, const struct sk_buff *skb, u32 extra_jiffies) { __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, false); } /* kill conntrack and do accounting */ bool nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb); /* kill conntrack without accounting */ static inline bool nf_ct_kill(struct nf_conn *ct) { return nf_ct_delete(ct, 0, 0); } /* Set all unconfirmed conntrack as dying */ void nf_ct_unconfirmed_destroy(struct net *); /* Iterate over all conntracks: if iter returns true, it's deleted. */ void nf_ct_iterate_cleanup_net(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data, u32 portid, int report); /* also set unconfirmed conntracks as dying. Only use in module exit path. */ void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data); struct nf_conntrack_zone; void nf_conntrack_free(struct nf_conn *ct); struct nf_conn *nf_conntrack_alloc(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp); static inline int nf_ct_is_template(const struct nf_conn *ct) { return test_bit(IPS_TEMPLATE_BIT, &ct->status); } /* It's confirmed if it is, or has been in the hash table. */ static inline int nf_ct_is_confirmed(const struct nf_conn *ct) { return test_bit(IPS_CONFIRMED_BIT, &ct->status); } static inline int nf_ct_is_dying(const struct nf_conn *ct) { return test_bit(IPS_DYING_BIT, &ct->status); } /* Packet is received from loopback */ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) { return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK; } #define nfct_time_stamp ((u32)(jiffies)) /* jiffies until ct expires, 0 if already expired */ static inline unsigned long nf_ct_expires(const struct nf_conn *ct) { s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; return timeout > 0 ? timeout : 0; } static inline bool nf_ct_is_expired(const struct nf_conn *ct) { return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0; } /* use after obtaining a reference count */ static inline bool nf_ct_should_gc(const struct nf_conn *ct) { return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct); } #define NF_CT_DAY (86400 * HZ) /* Set an arbitrary timeout large enough not to ever expire, this save * us a check for the IPS_OFFLOAD_BIT from the packet path via * nf_ct_is_expired(). */ static inline void nf_ct_offload_timeout(struct nf_conn *ct) { if (nf_ct_expires(ct) < NF_CT_DAY / 2) WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY); } struct kernel_param; int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp); int nf_conntrack_hash_resize(unsigned int hashsize); extern struct hlist_nulls_head *nf_conntrack_hash; extern unsigned int nf_conntrack_htable_size; extern seqcount_spinlock_t nf_conntrack_generation; extern unsigned int nf_conntrack_max; /* must be called with rcu read lock held */ static inline void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize) { struct hlist_nulls_head *hptr; unsigned int sequence, hsz; do { sequence = read_seqcount_begin(&nf_conntrack_generation); hsz = nf_conntrack_htable_size; hptr = nf_conntrack_hash; } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); *hash = hptr; *hsize = hsz; } struct nf_conn *nf_ct_tmpl_alloc(struct net *net, const struct nf_conntrack_zone *zone, gfp_t flags); void nf_ct_tmpl_free(struct nf_conn *tmpl); u32 nf_ct_get_id(const struct nf_conn *ct); static inline void nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info) { skb_set_nfct(skb, (unsigned long)ct | info); } #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v)) #define MODULE_ALIAS_NFCT_HELPER(helper) \ MODULE_ALIAS("nfct-helper-" helper) #endif /* _NF_CONNTRACK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PERCPU_COUNTER_H #define _LINUX_PERCPU_COUNTER_H /* * A simple "approximate counter" for use in ext2 and ext3 superblocks. * * WARNING: these things are HUGE. 4 kbytes per counter on 32-way P4. */ #include <linux/spinlock.h> #include <linux/smp.h> #include <linux/list.h> #include <linux/threads.h> #include <linux/percpu.h> #include <linux/types.h> #include <linux/gfp.h> #ifdef CONFIG_SMP struct percpu_counter { raw_spinlock_t lock; s64 count; #ifdef CONFIG_HOTPLUG_CPU struct list_head list; /* All percpu_counters are on a list */ #endif s32 __percpu *counters; }; extern int percpu_counter_batch; int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, struct lock_class_key *key); #define percpu_counter_init(fbc, value, gfp) \ ({ \ static struct lock_class_key __key; \ \ __percpu_counter_init(fbc, value, gfp, &__key); \ }) void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); void percpu_counter_sync(struct percpu_counter *fbc); static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) { return __percpu_counter_compare(fbc, rhs, percpu_counter_batch); } static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { percpu_counter_add_batch(fbc, amount, percpu_counter_batch); } static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { s64 ret = __percpu_counter_sum(fbc); return ret < 0 ? 0 : ret; } static inline s64 percpu_counter_sum(struct percpu_counter *fbc) { return __percpu_counter_sum(fbc); } static inline s64 percpu_counter_read(struct percpu_counter *fbc) { return fbc->count; } /* * It is possible for the percpu_counter_read() to return a small negative * number for some counter which should never be negative. * */ static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc) { /* Prevent reloads of fbc->count */ s64 ret = READ_ONCE(fbc->count); if (ret >= 0) return ret; return 0; } static inline bool percpu_counter_initialized(struct percpu_counter *fbc) { return (fbc->counters != NULL); } #else /* !CONFIG_SMP */ struct percpu_counter { s64 count; }; static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp) { fbc->count = amount; return 0; } static inline void percpu_counter_destroy(struct percpu_counter *fbc) { } static inline void percpu_counter_set(struct percpu_counter *fbc, s64 amount) { fbc->count = amount; } static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) { if (fbc->count > rhs) return 1; else if (fbc->count < rhs) return -1; else return 0; } static inline int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) { return percpu_counter_compare(fbc, rhs); } static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { preempt_disable(); fbc->count += amount; preempt_enable(); } static inline void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) { percpu_counter_add(fbc, amount); } static inline s64 percpu_counter_read(struct percpu_counter *fbc) { return fbc->count; } /* * percpu_counter is intended to track positive numbers. In the UP case the * number should never be negative. */ static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc) { return fbc->count; } static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { return percpu_counter_read_positive(fbc); } static inline s64 percpu_counter_sum(struct percpu_counter *fbc) { return percpu_counter_read(fbc); } static inline bool percpu_counter_initialized(struct percpu_counter *fbc) { return true; } static inline void percpu_counter_sync(struct percpu_counter *fbc) { } #endif /* CONFIG_SMP */ static inline void percpu_counter_inc(struct percpu_counter *fbc) { percpu_counter_add(fbc, 1); } static inline void percpu_counter_dec(struct percpu_counter *fbc) { percpu_counter_add(fbc, -1); } static inline void percpu_counter_sub(struct percpu_counter *fbc, s64 amount) { percpu_counter_add(fbc, -amount); } #endif /* _LINUX_PERCPU_COUNTER_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NSPROXY_H #define _LINUX_NSPROXY_H #include <linux/spinlock.h> #include <linux/sched.h> struct mnt_namespace; struct uts_namespace; struct ipc_namespace; struct pid_namespace; struct cgroup_namespace; struct fs_struct; /* * A structure to contain pointers to all per-process * namespaces - fs (mount), uts, network, sysvipc, etc. * * The pid namespace is an exception -- it's accessed using * task_active_pid_ns. The pid namespace here is the * namespace that children will use. * * 'count' is the number of tasks holding a reference. * The count for each namespace, then, will be the number * of nsproxies pointing to it, not the number of tasks. * * The nsproxy is shared by tasks which share all namespaces. * As soon as a single namespace is cloned or unshared, the * nsproxy is copied. */ struct nsproxy { atomic_t count; struct uts_namespace *uts_ns; struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; struct pid_namespace *pid_ns_for_children; struct net *net_ns; struct time_namespace *time_ns; struct time_namespace *time_ns_for_children; struct cgroup_namespace *cgroup_ns; }; extern struct nsproxy init_nsproxy; /* * A structure to encompass all bits needed to install * a partial or complete new set of namespaces. * * If a new user namespace is requested cred will * point to a modifiable set of credentials. If a pointer * to a modifiable set is needed nsset_cred() must be * used and tested. */ struct nsset { unsigned flags; struct nsproxy *nsproxy; struct fs_struct *fs; const struct cred *cred; }; static inline struct cred *nsset_cred(struct nsset *set) { if (set->flags & CLONE_NEWUSER) return (struct cred *)set->cred; return NULL; } /* * the namespaces access rules are: * * 1. only current task is allowed to change tsk->nsproxy pointer or * any pointer on the nsproxy itself. Current must hold the task_lock * when changing tsk->nsproxy. * * 2. when accessing (i.e. reading) current task's namespaces - no * precautions should be taken - just dereference the pointers * * 3. the access to other task namespaces is performed like this * task_lock(task); * nsproxy = task->nsproxy; * if (nsproxy != NULL) { * / * * * work with the namespaces here * * e.g. get the reference on one of them * * / * } / * * * NULL task->nsproxy means that this task is * * almost dead (zombie) * * / * task_unlock(task); * */ int copy_namespaces(unsigned long flags, struct task_struct *tsk); void exit_task_namespaces(struct task_struct *tsk); void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); void free_nsproxy(struct nsproxy *ns); int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, struct cred *, struct fs_struct *); int __init nsproxy_cache_init(void); static inline void put_nsproxy(struct nsproxy *ns) { if (atomic_dec_and_test(&ns->count)) { free_nsproxy(ns); } } static inline void get_nsproxy(struct nsproxy *ns) { atomic_inc(&ns->count); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NAMEI_H #define _LINUX_NAMEI_H #include <linux/fs.h> #include <linux/kernel.h> #include <linux/path.h> #include <linux/fcntl.h> #include <linux/errno.h> enum { MAX_NESTED_LINKS = 8 }; #define MAXSYMLINKS 40 /* * Type of the last component on LOOKUP_PARENT */ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; /* pathwalk mode */ #define LOOKUP_FOLLOW 0x0001 /* follow links at the end */ #define LOOKUP_DIRECTORY 0x0002 /* require a directory */ #define LOOKUP_AUTOMOUNT 0x0004 /* force terminal automount */ #define LOOKUP_EMPTY 0x4000 /* accept empty path [user_... only] */ #define LOOKUP_DOWN 0x8000 /* follow mounts in the starting point */ #define LOOKUP_MOUNTPOINT 0x0080 /* follow mounts in the end */ #define LOOKUP_REVAL 0x0020 /* tell ->d_revalidate() to trust no cache */ #define LOOKUP_RCU 0x0040 /* RCU pathwalk mode; semi-internal */ /* These tell filesystem methods that we are dealing with the final component... */ #define LOOKUP_OPEN 0x0100 /* ... in open */ #define LOOKUP_CREATE 0x0200 /* ... in object creation */ #define LOOKUP_EXCL 0x0400 /* ... in exclusive creation */ #define LOOKUP_RENAME_TARGET 0x0800 /* ... in destination of rename() */ /* internal use only */ #define LOOKUP_PARENT 0x0010 #define LOOKUP_JUMPED 0x1000 #define LOOKUP_ROOT 0x2000 #define LOOKUP_ROOT_GRABBED 0x0008 /* Scoping flags for lookup. */ #define LOOKUP_NO_SYMLINKS 0x010000 /* No symlink crossing. */ #define LOOKUP_NO_MAGICLINKS 0x020000 /* No nd_jump_link() crossing. */ #define LOOKUP_NO_XDEV 0x040000 /* No mountpoint crossing. */ #define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */ #define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */ /* LOOKUP_* flags which do scope-related checks based on the dirfd. */ #define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT) extern int path_pts(struct path *path); extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty); static inline int user_path_at(int dfd, const char __user *name, unsigned flags, struct path *path) { return user_path_at_empty(dfd, name, flags, path, NULL); } extern int kern_path(const char *, unsigned, struct path *); extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int); extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int); extern int follow_down_one(struct path *); extern int follow_down(struct path *); extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); extern int __must_check nd_jump_link(struct path *path); static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) { ((char *) name)[min(len, maxlen)] = '\0'; } /** * retry_estale - determine whether the caller should retry an operation * @error: the error that would currently be returned * @flags: flags being used for next lookup attempt * * Check to see if the error code was -ESTALE, and then determine whether * to retry the call based on whether "flags" already has LOOKUP_REVAL set. * * Returns true if the caller should try the operation again. */ static inline bool retry_estale(const long error, const unsigned int flags) { return error == -ESTALE && !(flags & LOOKUP_REVAL); } #endif /* _LINUX_NAMEI_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_IVERSION_H #define _LINUX_IVERSION_H #include <linux/fs.h> /* * The inode->i_version field: * --------------------------- * The change attribute (i_version) is mandated by NFSv4 and is mostly for * knfsd, but is also used for other purposes (e.g. IMA). The i_version must * appear different to observers if there was a change to the inode's data or * metadata since it was last queried. * * Observers see the i_version as a 64-bit number that never decreases. If it * remains the same since it was last checked, then nothing has changed in the * inode. If it's different then something has changed. Observers cannot infer * anything about the nature or magnitude of the changes from the value, only * that the inode has changed in some fashion. * * Not all filesystems properly implement the i_version counter. Subsystems that * want to use i_version field on an inode should first check whether the * filesystem sets the SB_I_VERSION flag (usually via the IS_I_VERSION macro). * * Those that set SB_I_VERSION will automatically have their i_version counter * incremented on writes to normal files. If the SB_I_VERSION is not set, then * the VFS will not touch it on writes, and the filesystem can use it how it * wishes. Note that the filesystem is always responsible for updating the * i_version on namespace changes in directories (mkdir, rmdir, unlink, etc.). * We consider these sorts of filesystems to have a kernel-managed i_version. * * It may be impractical for filesystems to keep i_version updates atomic with * respect to the changes that cause them. They should, however, guarantee * that i_version updates are never visible before the changes that caused * them. Also, i_version updates should never be delayed longer than it takes * the original change to reach disk. * * This implementation uses the low bit in the i_version field as a flag to * track when the value has been queried. If it has not been queried since it * was last incremented, we can skip the increment in most cases. * * In the event that we're updating the ctime, we will usually go ahead and * bump the i_version anyway. Since that has to go to stable storage in some * fashion, we might as well increment it as well. * * With this implementation, the value should always appear to observers to * increase over time if the file has changed. It's recommended to use * inode_eq_iversion() helper to compare values. * * Note that some filesystems (e.g. NFS and AFS) just use the field to store * a server-provided value (for the most part). For that reason, those * filesystems do not set SB_I_VERSION. These filesystems are considered to * have a self-managed i_version. * * Persistently storing the i_version * ---------------------------------- * Queries of the i_version field are not gated on them hitting the backing * store. It's always possible that the host could crash after allowing * a query of the value but before it has made it to disk. * * To mitigate this problem, filesystems should always use * inode_set_iversion_queried when loading an existing inode from disk. This * ensures that the next attempted inode increment will result in the value * changing. * * Storing the value to disk therefore does not count as a query, so those * filesystems should use inode_peek_iversion to grab the value to be stored. * There is no need to flag the value as having been queried in that case. */ /* * We borrow the lowest bit in the i_version to use as a flag to tell whether * it has been queried since we last incremented it. If it has, then we must * increment it on the next change. After that, we can clear the flag and * avoid incrementing it again until it has again been queried. */ #define I_VERSION_QUERIED_SHIFT (1) #define I_VERSION_QUERIED (1ULL << (I_VERSION_QUERIED_SHIFT - 1)) #define I_VERSION_INCREMENT (1ULL << I_VERSION_QUERIED_SHIFT) /** * inode_set_iversion_raw - set i_version to the specified raw value * @inode: inode to set * @val: new i_version value to set * * Set @inode's i_version field to @val. This function is for use by * filesystems that self-manage the i_version. * * For example, the NFS client stores its NFSv4 change attribute in this way, * and the AFS client stores the data_version from the server here. */ static inline void inode_set_iversion_raw(struct inode *inode, u64 val) { atomic64_set(&inode->i_version, val); } /** * inode_peek_iversion_raw - grab a "raw" iversion value * @inode: inode from which i_version should be read * * Grab a "raw" inode->i_version value and return it. The i_version is not * flagged or converted in any way. This is mostly used to access a self-managed * i_version. * * With those filesystems, we want to treat the i_version as an entirely * opaque value. */ static inline u64 inode_peek_iversion_raw(const struct inode *inode) { return atomic64_read(&inode->i_version); } /** * inode_set_max_iversion_raw - update i_version new value is larger * @inode: inode to set * @val: new i_version to set * * Some self-managed filesystems (e.g Ceph) will only update the i_version * value if the new value is larger than the one we already have. */ static inline void inode_set_max_iversion_raw(struct inode *inode, u64 val) { u64 cur, old; cur = inode_peek_iversion_raw(inode); for (;;) { if (cur > val) break; old = atomic64_cmpxchg(&inode->i_version, cur, val); if (likely(old == cur)) break; cur = old; } } /** * inode_set_iversion - set i_version to a particular value * @inode: inode to set * @val: new i_version value to set * * Set @inode's i_version field to @val. This function is for filesystems with * a kernel-managed i_version, for initializing a newly-created inode from * scratch. * * In this case, we do not set the QUERIED flag since we know that this value * has never been queried. */ static inline void inode_set_iversion(struct inode *inode, u64 val) { inode_set_iversion_raw(inode, val << I_VERSION_QUERIED_SHIFT); } /** * inode_set_iversion_queried - set i_version to a particular value as quereied * @inode: inode to set * @val: new i_version value to set * * Set @inode's i_version field to @val, and flag it for increment on the next * change. * * Filesystems that persistently store the i_version on disk should use this * when loading an existing inode from disk. * * When loading in an i_version value from a backing store, we can't be certain * that it wasn't previously viewed before being stored. Thus, we must assume * that it was, to ensure that we don't end up handing out the same value for * different versions of the same inode. */ static inline void inode_set_iversion_queried(struct inode *inode, u64 val) { inode_set_iversion_raw(inode, (val << I_VERSION_QUERIED_SHIFT) | I_VERSION_QUERIED); } /** * inode_maybe_inc_iversion - increments i_version * @inode: inode with the i_version that should be updated * @force: increment the counter even if it's not necessary? * * Every time the inode is modified, the i_version field must be seen to have * changed by any observer. * * If "force" is set or the QUERIED flag is set, then ensure that we increment * the value, and clear the queried flag. * * In the common case where neither is set, then we can return "false" without * updating i_version. * * If this function returns false, and no other metadata has changed, then we * can avoid logging the metadata. */ static inline bool inode_maybe_inc_iversion(struct inode *inode, bool force) { u64 cur, old, new; /* * The i_version field is not strictly ordered with any other inode * information, but the legacy inode_inc_iversion code used a spinlock * to serialize increments. * * Here, we add full memory barriers to ensure that any de-facto * ordering with other info is preserved. * * This barrier pairs with the barrier in inode_query_iversion() */ smp_mb(); cur = inode_peek_iversion_raw(inode); for (;;) { /* If flag is clear then we needn't do anything */ if (!force && !(cur & I_VERSION_QUERIED)) return false; /* Since lowest bit is flag, add 2 to avoid it */ new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT; old = atomic64_cmpxchg(&inode->i_version, cur, new); if (likely(old == cur)) break; cur = old; } return true; } /** * inode_inc_iversion - forcibly increment i_version * @inode: inode that needs to be updated * * Forcbily increment the i_version field. This always results in a change to * the observable value. */ static inline void inode_inc_iversion(struct inode *inode) { inode_maybe_inc_iversion(inode, true); } /** * inode_iversion_need_inc - is the i_version in need of being incremented? * @inode: inode to check * * Returns whether the inode->i_version counter needs incrementing on the next * change. Just fetch the value and check the QUERIED flag. */ static inline bool inode_iversion_need_inc(struct inode *inode) { return inode_peek_iversion_raw(inode) & I_VERSION_QUERIED; } /** * inode_inc_iversion_raw - forcibly increment raw i_version * @inode: inode that needs to be updated * * Forcbily increment the raw i_version field. This always results in a change * to the raw value. * * NFS will use the i_version field to store the value from the server. It * mostly treats it as opaque, but in the case where it holds a write * delegation, it must increment the value itself. This function does that. */ static inline void inode_inc_iversion_raw(struct inode *inode) { atomic64_inc(&inode->i_version); } /** * inode_peek_iversion - read i_version without flagging it to be incremented * @inode: inode from which i_version should be read * * Read the inode i_version counter for an inode without registering it as a * query. * * This is typically used by local filesystems that need to store an i_version * on disk. In that situation, it's not necessary to flag it as having been * viewed, as the result won't be used to gauge changes from that point. */ static inline u64 inode_peek_iversion(const struct inode *inode) { return inode_peek_iversion_raw(inode) >> I_VERSION_QUERIED_SHIFT; } /** * inode_query_iversion - read i_version for later use * @inode: inode from which i_version should be read * * Read the inode i_version counter. This should be used by callers that wish * to store the returned i_version for later comparison. This will guarantee * that a later query of the i_version will result in a different value if * anything has changed. * * In this implementation, we fetch the current value, set the QUERIED flag and * then try to swap it into place with a cmpxchg, if it wasn't already set. If * that fails, we try again with the newly fetched value from the cmpxchg. */ static inline u64 inode_query_iversion(struct inode *inode) { u64 cur, old, new; cur = inode_peek_iversion_raw(inode); for (;;) { /* If flag is already set, then no need to swap */ if (cur & I_VERSION_QUERIED) { /* * This barrier (and the implicit barrier in the * cmpxchg below) pairs with the barrier in * inode_maybe_inc_iversion(). */ smp_mb(); break; } new = cur | I_VERSION_QUERIED; old = atomic64_cmpxchg(&inode->i_version, cur, new); if (likely(old == cur)) break; cur = old; } return cur >> I_VERSION_QUERIED_SHIFT; } /** * inode_eq_iversion_raw - check whether the raw i_version counter has changed * @inode: inode to check * @old: old value to check against its i_version * * Compare the current raw i_version counter with a previous one. Returns true * if they are the same or false if they are different. */ static inline bool inode_eq_iversion_raw(const struct inode *inode, u64 old) { return inode_peek_iversion_raw(inode) == old; } /** * inode_eq_iversion - check whether the i_version counter has changed * @inode: inode to check * @old: old value to check against its i_version * * Compare an i_version counter with a previous one. Returns true if they are * the same, and false if they are different. * * Note that we don't need to set the QUERIED flag in this case, as the value * in the inode is not being recorded for later use. */ static inline bool inode_eq_iversion(const struct inode *inode, u64 old) { return inode_peek_iversion(inode) == old; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BITOPS_H #define _LINUX_BITOPS_H #include <asm/types.h> #include <linux/bits.h> /* Set bits in the first 'n' bytes when loaded from memory */ #ifdef __LITTLE_ENDIAN # define aligned_byte_mask(n) ((1UL << 8*(n))-1) #else # define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n))) #endif #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) #define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(u64)) #define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(u32)) #define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(char)) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); extern unsigned int __sw_hweight32(unsigned int w); extern unsigned long __sw_hweight64(__u64 w); /* * Include this here because some architectures need generic_ffs/fls in * scope */ #include <asm/bitops.h> #define for_each_set_bit(bit, addr, size) \ for ((bit) = find_first_bit((addr), (size)); \ (bit) < (size); \ (bit) = find_next_bit((addr), (size), (bit) + 1)) /* same as for_each_set_bit() but use bit as value to start with */ #define for_each_set_bit_from(bit, addr, size) \ for ((bit) = find_next_bit((addr), (size), (bit)); \ (bit) < (size); \ (bit) = find_next_bit((addr), (size), (bit) + 1)) #define for_each_clear_bit(bit, addr, size) \ for ((bit) = find_first_zero_bit((addr), (size)); \ (bit) < (size); \ (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) /* same as for_each_clear_bit() but use bit as value to start with */ #define for_each_clear_bit_from(bit, addr, size) \ for ((bit) = find_next_zero_bit((addr), (size), (bit)); \ (bit) < (size); \ (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) /** * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits * @start: bit offset to start search and to store the current iteration offset * @clump: location to store copy of current 8-bit clump * @bits: bitmap address to base the search on * @size: bitmap size in number of bits */ #define for_each_set_clump8(start, clump, bits, size) \ for ((start) = find_first_clump8(&(clump), (bits), (size)); \ (start) < (size); \ (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8)) static inline int get_bitmask_order(unsigned int count) { int order; order = fls(count); return order; /* We could be slightly more clever with -1 here... */ } static __always_inline unsigned long hweight_long(unsigned long w) { return sizeof(w) == 4 ? hweight32(w) : hweight64((__u64)w); } /** * rol64 - rotate a 64-bit value left * @word: value to rotate * @shift: bits to roll */ static inline __u64 rol64(__u64 word, unsigned int shift) { return (word << (shift & 63)) | (word >> ((-shift) & 63)); } /** * ror64 - rotate a 64-bit value right * @word: value to rotate * @shift: bits to roll */ static inline __u64 ror64(__u64 word, unsigned int shift) { return (word >> (shift & 63)) | (word << ((-shift) & 63)); } /** * rol32 - rotate a 32-bit value left * @word: value to rotate * @shift: bits to roll */ static inline __u32 rol32(__u32 word, unsigned int shift) { return (word << (shift & 31)) | (word >> ((-shift) & 31)); } /** * ror32 - rotate a 32-bit value right * @word: value to rotate * @shift: bits to roll */ static inline __u32 ror32(__u32 word, unsigned int shift) { return (word >> (shift & 31)) | (word << ((-shift) & 31)); } /** * rol16 - rotate a 16-bit value left * @word: value to rotate * @shift: bits to roll */ static inline __u16 rol16(__u16 word, unsigned int shift) { return (word << (shift & 15)) | (word >> ((-shift) & 15)); } /** * ror16 - rotate a 16-bit value right * @word: value to rotate * @shift: bits to roll */ static inline __u16 ror16(__u16 word, unsigned int shift) { return (word >> (shift & 15)) | (word << ((-shift) & 15)); } /** * rol8 - rotate an 8-bit value left * @word: value to rotate * @shift: bits to roll */ static inline __u8 rol8(__u8 word, unsigned int shift) { return (word << (shift & 7)) | (word >> ((-shift) & 7)); } /** * ror8 - rotate an 8-bit value right * @word: value to rotate * @shift: bits to roll */ static inline __u8 ror8(__u8 word, unsigned int shift) { return (word >> (shift & 7)) | (word << ((-shift) & 7)); } /** * sign_extend32 - sign extend a 32-bit value using specified bit as sign-bit * @value: value to sign extend * @index: 0 based bit index (0<=index<32) to sign bit * * This is safe to use for 16- and 8-bit types as well. */ static __always_inline __s32 sign_extend32(__u32 value, int index) { __u8 shift = 31 - index; return (__s32)(value << shift) >> shift; } /** * sign_extend64 - sign extend a 64-bit value using specified bit as sign-bit * @value: value to sign extend * @index: 0 based bit index (0<=index<64) to sign bit */ static __always_inline __s64 sign_extend64(__u64 value, int index) { __u8 shift = 63 - index; return (__s64)(value << shift) >> shift; } static inline unsigned fls_long(unsigned long l) { if (sizeof(l) == 4) return fls(l); return fls64(l); } static inline int get_count_order(unsigned int count) { if (count == 0) return -1; return fls(--count); } /** * get_count_order_long - get order after rounding @l up to power of 2 * @l: parameter * * it is same as get_count_order() but with long type parameter */ static inline int get_count_order_long(unsigned long l) { if (l == 0UL) return -1; return (int)fls_long(--l); } /** * __ffs64 - find first set bit in a 64 bit word * @word: The 64 bit word * * On 64 bit arches this is a synomyn for __ffs * The result is not defined if no bits are set, so check that @word * is non-zero before calling this. */ static inline unsigned long __ffs64(u64 word) { #if BITS_PER_LONG == 32 if (((u32)word) == 0UL) return __ffs((u32)(word >> 32)) + 32; #elif BITS_PER_LONG != 64 #error BITS_PER_LONG not 32 or 64 #endif return __ffs((unsigned long)word); } /** * assign_bit - Assign value to a bit in memory * @nr: the bit to set * @addr: the address to start counting from * @value: the value to assign */ static __always_inline void assign_bit(long nr, volatile unsigned long *addr, bool value) { if (value) set_bit(nr, addr); else clear_bit(nr, addr); } static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, bool value) { if (value) __set_bit(nr, addr); else __clear_bit(nr, addr); } #ifdef __KERNEL__ #ifndef set_mask_bits #define set_mask_bits(ptr, mask, bits) \ ({ \ const typeof(*(ptr)) mask__ = (mask), bits__ = (bits); \ typeof(*(ptr)) old__, new__; \ \ do { \ old__ = READ_ONCE(*(ptr)); \ new__ = (old__ & ~mask__) | bits__; \ } while (cmpxchg(ptr, old__, new__) != old__); \ \ old__; \ }) #endif #ifndef bit_clear_unless #define bit_clear_unless(ptr, clear, test) \ ({ \ const typeof(*(ptr)) clear__ = (clear), test__ = (test);\ typeof(*(ptr)) old__, new__; \ \ do { \ old__ = READ_ONCE(*(ptr)); \ new__ = old__ & ~clear__; \ } while (!(old__ & test__) && \ cmpxchg(ptr, old__, new__) != old__); \ \ !(old__ & test__); \ }) #endif #ifndef find_last_bit /** * find_last_bit - find the last set bit in a memory region * @addr: The address to start the search at * @size: The number of bits to search * * Returns the bit number of the last set bit, or size. */ extern unsigned long find_last_bit(const unsigned long *addr, unsigned long size); #endif #endif /* __KERNEL__ */ #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __KERNEL_PRINTK__ #define __KERNEL_PRINTK__ #include <stdarg.h> #include <linux/init.h> #include <linux/kern_levels.h> #include <linux/linkage.h> #include <linux/cache.h> #include <linux/ratelimit_types.h> extern const char linux_banner[]; extern const char linux_proc_banner[]; extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ #define PRINTK_MAX_SINGLE_HEADER_LEN 2 static inline int printk_get_level(const char *buffer) { if (buffer[0] == KERN_SOH_ASCII && buffer[1]) { switch (buffer[1]) { case '0' ... '7': case 'c': /* KERN_CONT */ return buffer[1]; } } return 0; } static inline const char *printk_skip_level(const char *buffer) { if (printk_get_level(buffer)) return buffer + 2; return buffer; } static inline const char *printk_skip_headers(const char *buffer) { while (printk_get_level(buffer)) buffer = printk_skip_level(buffer); return buffer; } #define CONSOLE_EXT_LOG_MAX 8192 /* printk's without a loglevel use this.. */ #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT /* We show everything that is MORE important than this.. */ #define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */ #define CONSOLE_LOGLEVEL_MIN 1 /* Minimum loglevel we let people use */ #define CONSOLE_LOGLEVEL_DEBUG 10 /* issue debug messages */ #define CONSOLE_LOGLEVEL_MOTORMOUTH 15 /* You can't shut this one up */ /* * Default used to be hard-coded at 7, quiet used to be hardcoded at 4, * we're now allowing both to be set from kernel config. */ #define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT #define CONSOLE_LOGLEVEL_QUIET CONFIG_CONSOLE_LOGLEVEL_QUIET extern int console_printk[]; #define console_loglevel (console_printk[0]) #define default_message_loglevel (console_printk[1]) #define minimum_console_loglevel (console_printk[2]) #define default_console_loglevel (console_printk[3]) static inline void console_silent(void) { console_loglevel = CONSOLE_LOGLEVEL_SILENT; } static inline void console_verbose(void) { if (console_loglevel) console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH; } /* strlen("ratelimit") + 1 */ #define DEVKMSG_STR_MAX_SIZE 10 extern char devkmsg_log_str[]; struct ctl_table; extern int suppress_printk; struct va_format { const char *fmt; va_list *va; }; /* * FW_BUG * Add this to a message where you are sure the firmware is buggy or behaves * really stupid or out of spec. Be aware that the responsible BIOS developer * should be able to fix this issue or at least get a concrete idea of the * problem by reading your message without the need of looking at the kernel * code. * * Use it for definite and high priority BIOS bugs. * * FW_WARN * Use it for not that clear (e.g. could the kernel messed up things already?) * and medium priority BIOS bugs. * * FW_INFO * Use this one if you want to tell the user or vendor about something * suspicious, but generally harmless related to the firmware. * * Use it for information or very low priority BIOS bugs. */ #define FW_BUG "[Firmware Bug]: " #define FW_WARN "[Firmware Warn]: " #define FW_INFO "[Firmware Info]: " /* * HW_ERR * Add this to a message for hardware errors, so that user can report * it to hardware vendor instead of LKML or software vendor. */ #define HW_ERR "[Hardware Error]: " /* * DEPRECATED * Add this to a message whenever you want to warn user space about the use * of a deprecated aspect of an API so they can stop using it */ #define DEPRECATED "[Deprecated]: " /* * Dummy printk for disabled debugging statements to use whilst maintaining * gcc's format checking. */ #define no_printk(fmt, ...) \ ({ \ if (0) \ printk(fmt, ##__VA_ARGS__); \ 0; \ }) #ifdef CONFIG_EARLY_PRINTK extern asmlinkage __printf(1, 2) void early_printk(const char *fmt, ...); #else static inline __printf(1, 2) __cold void early_printk(const char *s, ...) { } #endif #ifdef CONFIG_PRINTK_NMI extern void printk_nmi_enter(void); extern void printk_nmi_exit(void); extern void printk_nmi_direct_enter(void); extern void printk_nmi_direct_exit(void); #else static inline void printk_nmi_enter(void) { } static inline void printk_nmi_exit(void) { } static inline void printk_nmi_direct_enter(void) { } static inline void printk_nmi_direct_exit(void) { } #endif /* PRINTK_NMI */ struct dev_printk_info; #ifdef CONFIG_PRINTK asmlinkage __printf(4, 0) int vprintk_emit(int facility, int level, const struct dev_printk_info *dev_info, const char *fmt, va_list args); asmlinkage __printf(1, 0) int vprintk(const char *fmt, va_list args); asmlinkage __printf(1, 2) __cold int printk(const char *fmt, ...); /* * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ ! */ __printf(1, 2) __cold int printk_deferred(const char *fmt, ...); /* * Please don't use printk_ratelimit(), because it shares ratelimiting state * with all other unrelated printk_ratelimit() callsites. Instead use * printk_ratelimited() or plain old __ratelimit(). */ extern int __printk_ratelimit(const char *func); #define printk_ratelimit() __printk_ratelimit(__func__) extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); extern int printk_delay_msec; extern int dmesg_restrict; extern int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void *buf, size_t *lenp, loff_t *ppos); extern void wake_up_klogd(void); char *log_buf_addr_get(void); u32 log_buf_len_get(void); void log_buf_vmcoreinfo_setup(void); void __init setup_log_buf(int early); __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); extern asmlinkage void dump_stack(void) __cold; extern void printk_safe_flush(void); extern void printk_safe_flush_on_panic(void); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) { return 0; } static inline __printf(1, 2) __cold int printk(const char *s, ...) { return 0; } static inline __printf(1, 2) __cold int printk_deferred(const char *s, ...) { return 0; } static inline int printk_ratelimit(void) { return 0; } static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec) { return false; } static inline void wake_up_klogd(void) { } static inline char *log_buf_addr_get(void) { return NULL; } static inline u32 log_buf_len_get(void) { return 0; } static inline void log_buf_vmcoreinfo_setup(void) { } static inline void setup_log_buf(int early) { } static inline __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...) { } static inline void dump_stack_print_info(const char *log_lvl) { } static inline void show_regs_print_info(const char *log_lvl) { } static inline void dump_stack(void) { } static inline void printk_safe_flush(void) { } static inline void printk_safe_flush_on_panic(void) { } #endif extern int kptr_restrict; /** * pr_fmt - used by the pr_*() macros to generate the printk format string * @fmt: format string passed from a pr_*() macro * * This macro can be used to generate a unified format string for pr_*() * macros. A common use is to prefix all pr_*() messages in a file with a common * string. For example, defining this at the top of a source file: * * #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt * * would prefix all pr_info, pr_emerg... messages in the file with the module * name. */ #ifndef pr_fmt #define pr_fmt(fmt) fmt #endif /** * pr_emerg - Print an emergency-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_EMERG loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_emerg(fmt, ...) \ printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) /** * pr_alert - Print an alert-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_ALERT loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_alert(fmt, ...) \ printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) /** * pr_crit - Print a critical-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_CRIT loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_crit(fmt, ...) \ printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) /** * pr_err - Print an error-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_ERR loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_err(fmt, ...) \ printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) /** * pr_warn - Print a warning-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_WARNING loglevel. It uses pr_fmt() * to generate the format string. */ #define pr_warn(fmt, ...) \ printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) /** * pr_notice - Print a notice-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_NOTICE loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_notice(fmt, ...) \ printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) /** * pr_info - Print an info-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_INFO loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_info(fmt, ...) \ printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) /** * pr_cont - Continues a previous log message in the same line. * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_CONT loglevel. It should only be * used when continuing a log message with no newline ('\n') enclosed. Otherwise * it defaults back to KERN_DEFAULT loglevel. */ #define pr_cont(fmt, ...) \ printk(KERN_CONT fmt, ##__VA_ARGS__) /** * pr_devel - Print a debug-level message conditionally * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_DEBUG loglevel if DEBUG is * defined. Otherwise it does nothing. * * It uses pr_fmt() to generate the format string. */ #ifdef DEBUG #define pr_devel(fmt, ...) \ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_devel(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* If you are writing a driver, please use dev_dbg instead */ #if defined(CONFIG_DYNAMIC_DEBUG) || \ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) #include <linux/dynamic_debug.h> /** * pr_debug - Print a debug-level message conditionally * @fmt: format string * @...: arguments for the format string * * This macro expands to dynamic_pr_debug() if CONFIG_DYNAMIC_DEBUG is * set. Otherwise, if DEBUG is defined, it's equivalent to a printk with * KERN_DEBUG loglevel. If DEBUG is not defined it does nothing. * * It uses pr_fmt() to generate the format string (dynamic_pr_debug() uses * pr_fmt() internally). */ #define pr_debug(fmt, ...) \ dynamic_pr_debug(fmt, ##__VA_ARGS__) #elif defined(DEBUG) #define pr_debug(fmt, ...) \ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_debug(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* * Print a one-time message (analogous to WARN_ONCE() et al): */ #ifdef CONFIG_PRINTK #define printk_once(fmt, ...) \ ({ \ static bool __section(".data.once") __print_once; \ bool __ret_print_once = !__print_once; \ \ if (!__print_once) { \ __print_once = true; \ printk(fmt, ##__VA_ARGS__); \ } \ unlikely(__ret_print_once); \ }) #define printk_deferred_once(fmt, ...) \ ({ \ static bool __section(".data.once") __print_once; \ bool __ret_print_once = !__print_once; \ \ if (!__print_once) { \ __print_once = true; \ printk_deferred(fmt, ##__VA_ARGS__); \ } \ unlikely(__ret_print_once); \ }) #else #define printk_once(fmt, ...) \ no_printk(fmt, ##__VA_ARGS__) #define printk_deferred_once(fmt, ...) \ no_printk(fmt, ##__VA_ARGS__) #endif #define pr_emerg_once(fmt, ...) \ printk_once(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) #define pr_alert_once(fmt, ...) \ printk_once(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) #define pr_crit_once(fmt, ...) \ printk_once(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) #define pr_err_once(fmt, ...) \ printk_once(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) #define pr_warn_once(fmt, ...) \ printk_once(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) #define pr_notice_once(fmt, ...) \ printk_once(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) #define pr_info_once(fmt, ...) \ printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) /* no pr_cont_once, don't do that... */ #if defined(DEBUG) #define pr_devel_once(fmt, ...) \ printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_devel_once(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* If you are writing a driver, please use dev_dbg instead */ #if defined(DEBUG) #define pr_debug_once(fmt, ...) \ printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_debug_once(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* * ratelimited messages with local ratelimit_state, * no local ratelimit_state used in the !PRINTK case */ #ifdef CONFIG_PRINTK #define printk_ratelimited(fmt, ...) \ ({ \ static DEFINE_RATELIMIT_STATE(_rs, \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ \ if (__ratelimit(&_rs)) \ printk(fmt, ##__VA_ARGS__); \ }) #else #define printk_ratelimited(fmt, ...) \ no_printk(fmt, ##__VA_ARGS__) #endif #define pr_emerg_ratelimited(fmt, ...) \ printk_ratelimited(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) #define pr_alert_ratelimited(fmt, ...) \ printk_ratelimited(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) #define pr_crit_ratelimited(fmt, ...) \ printk_ratelimited(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) #define pr_err_ratelimited(fmt, ...) \ printk_ratelimited(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) #define pr_warn_ratelimited(fmt, ...) \ printk_ratelimited(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) #define pr_notice_ratelimited(fmt, ...) \ printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) #define pr_info_ratelimited(fmt, ...) \ printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) /* no pr_cont_ratelimited, don't do that... */ #if defined(DEBUG) #define pr_devel_ratelimited(fmt, ...) \ printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_devel_ratelimited(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* If you are writing a driver, please use dev_dbg instead */ #if defined(CONFIG_DYNAMIC_DEBUG) || \ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) /* descriptor check is first to prevent flooding with "callbacks suppressed" */ #define pr_debug_ratelimited(fmt, ...) \ do { \ static DEFINE_RATELIMIT_STATE(_rs, \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, pr_fmt(fmt)); \ if (DYNAMIC_DEBUG_BRANCH(descriptor) && \ __ratelimit(&_rs)) \ __dynamic_pr_debug(&descriptor, pr_fmt(fmt), ##__VA_ARGS__); \ } while (0) #elif defined(DEBUG) #define pr_debug_ratelimited(fmt, ...) \ printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_debug_ratelimited(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif extern const struct file_operations kmsg_fops; enum { DUMP_PREFIX_NONE, DUMP_PREFIX_ADDRESS, DUMP_PREFIX_OFFSET }; extern int hex_dump_to_buffer(const void *buf, size_t len, int rowsize, int groupsize, char *linebuf, size_t linebuflen, bool ascii); #ifdef CONFIG_PRINTK extern void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii); #else static inline void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii) { } static inline void print_hex_dump_bytes(const char *prefix_str, int prefix_type, const void *buf, size_t len) { } #endif #if defined(CONFIG_DYNAMIC_DEBUG) || \ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) #define print_hex_dump_debug(prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) \ dynamic_hex_dump(prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) #elif defined(DEBUG) #define print_hex_dump_debug(prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) \ print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) #else static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii) { } #endif /** * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params * @prefix_str: string to prefix each line with; * caller supplies trailing spaces for alignment if desired * @prefix_type: controls whether prefix of an offset, address, or none * is printed (%DUMP_PREFIX_OFFSET, %DUMP_PREFIX_ADDRESS, %DUMP_PREFIX_NONE) * @buf: data blob to dump * @len: number of bytes in the @buf * * Calls print_hex_dump(), with log level of KERN_DEBUG, * rowsize of 16, groupsize of 1, and ASCII output included. */ #define print_hex_dump_bytes(prefix_str, prefix_type, buf, len) \ print_hex_dump_debug(prefix_str, prefix_type, 16, 1, buf, len, true) #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_FIB_RULES_H #define __NET_FIB_RULES_H #include <linux/types.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/fib_rules.h> #include <linux/refcount.h> #include <net/flow.h> #include <net/rtnetlink.h> #include <net/fib_notifier.h> #include <linux/indirect_call_wrapper.h> struct fib_kuid_range { kuid_t start; kuid_t end; }; struct fib_rule { struct list_head list; int iifindex; int oifindex; u32 mark; u32 mark_mask; u32 flags; u32 table; u8 action; u8 l3mdev; u8 proto; u8 ip_proto; u32 target; __be64 tun_id; struct fib_rule __rcu *ctarget; struct net *fr_net; refcount_t refcnt; u32 pref; int suppress_ifgroup; int suppress_prefixlen; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; struct fib_kuid_range uid_range; struct fib_rule_port_range sport_range; struct fib_rule_port_range dport_range; struct rcu_head rcu; }; struct fib_lookup_arg { void *lookup_ptr; const void *lookup_data; void *result; struct fib_rule *rule; u32 table; int flags; #define FIB_LOOKUP_NOREF 1 #define FIB_LOOKUP_IGNORE_LINKSTATE 2 }; struct fib_rules_ops { int family; struct list_head list; int rule_size; int addr_size; int unresolved_rules; int nr_goto_rules; unsigned int fib_rules_seq; int (*action)(struct fib_rule *, struct flowi *, int, struct fib_lookup_arg *); bool (*suppress)(struct fib_rule *, int, struct fib_lookup_arg *); int (*match)(struct fib_rule *, struct flowi *, int); int (*configure)(struct fib_rule *, struct sk_buff *, struct fib_rule_hdr *, struct nlattr **, struct netlink_ext_ack *); int (*delete)(struct fib_rule *); int (*compare)(struct fib_rule *, struct fib_rule_hdr *, struct nlattr **); int (*fill)(struct fib_rule *, struct sk_buff *, struct fib_rule_hdr *); size_t (*nlmsg_payload)(struct fib_rule *); /* Called after modifications to the rules set, must flush * the route cache if one exists. */ void (*flush_cache)(struct fib_rules_ops *ops); int nlgroup; const struct nla_policy *policy; struct list_head rules_list; struct module *owner; struct net *fro_net; struct rcu_head rcu; }; struct fib_rule_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib_rule *rule; }; #define FRA_GENERIC_POLICY \ [FRA_UNSPEC] = { .strict_start_type = FRA_DPORT_RANGE + 1 }, \ [FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_PRIORITY] = { .type = NLA_U32 }, \ [FRA_FWMARK] = { .type = NLA_U32 }, \ [FRA_TUN_ID] = { .type = NLA_U64 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ [FRA_TABLE] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ [FRA_GOTO] = { .type = NLA_U32 }, \ [FRA_L3MDEV] = { .type = NLA_U8 }, \ [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, \ [FRA_PROTOCOL] = { .type = NLA_U8 }, \ [FRA_IP_PROTO] = { .type = NLA_U8 }, \ [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \ [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) } static inline void fib_rule_get(struct fib_rule *rule) { refcount_inc(&rule->refcnt); } static inline void fib_rule_put(struct fib_rule *rule) { if (refcount_dec_and_test(&rule->refcnt)) kfree_rcu(rule, rcu); } #ifdef CONFIG_NET_L3_MASTER_DEV static inline u32 fib_rule_get_table(struct fib_rule *rule, struct fib_lookup_arg *arg) { return rule->l3mdev ? arg->table : rule->table; } #else static inline u32 fib_rule_get_table(struct fib_rule *rule, struct fib_lookup_arg *arg) { return rule->table; } #endif static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) { if (nla[FRA_TABLE]) return nla_get_u32(nla[FRA_TABLE]); return frh->table; } static inline bool fib_rule_port_range_set(const struct fib_rule_port_range *range) { return range->start != 0 && range->end != 0; } static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a, __be16 port) { return ntohs(port) >= a->start && ntohs(port) <= a->end; } static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a) { return a->start != 0 && a->end != 0 && a->end < 0xffff && a->start <= a->end; } static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a, struct fib_rule_port_range *b) { return a->start == b->start && a->end == b->end; } static inline bool fib_rule_requires_fldissect(struct fib_rule *rule) { return rule->iifindex != LOOPBACK_IFINDEX && (rule->ip_proto || fib_rule_port_range_set(&rule->sport_range) || fib_rule_port_range_set(&rule->dport_range)); } struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *, struct net *); void fib_rules_unregister(struct fib_rules_ops *); int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, struct fib_lookup_arg *); int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, u32 flags); bool fib_rule_matchall(const struct fib_rule *rule); int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, struct netlink_ext_ack *extack); unsigned int fib_rules_seq_read(struct net *net, int family); int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); INDIRECT_CALLABLE_DECLARE(int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)); INDIRECT_CALLABLE_DECLARE(int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)); INDIRECT_CALLABLE_DECLARE(int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(bool fib6_rule_suppress(struct fib_rule *rule, int flags, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(bool fib4_rule_suppress(struct fib_rule *rule, int flags, struct fib_lookup_arg *arg)); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 /* * Performance events x86 architecture header * * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar * Copyright (C) 2009 Jaswinder Singh Rajput * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> * Copyright (C) 2009 Google, Inc., Stephane Eranian * * For licencing details see kernel-base/COPYING */ #include <linux/perf_event.h> #include <asm/intel_ds.h> /* To enable MSR tracing please use the generic trace points. */ /* * | NHM/WSM | SNB | * register ------------------------------- * | HT | no HT | HT | no HT | *----------------------------------------- * offcore | core | core | cpu | core | * lbr_sel | core | core | cpu | core | * ld_lat | cpu | core | cpu | core | *----------------------------------------- * * Given that there is a small number of shared regs, * we can pre-allocate their slot in the per-cpu * per-core reg tables. */ enum extra_reg_type { EXTRA_REG_NONE = -1, /* not used */ EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ EXTRA_REG_LBR = 2, /* lbr_select */ EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ EXTRA_REG_FE = 4, /* fe_* */ EXTRA_REG_MAX /* number of entries needed */ }; struct event_constraint { union { unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; u64 idxmsk64; }; u64 code; u64 cmask; int weight; int overlap; int flags; unsigned int size; }; static inline bool constraint_match(struct event_constraint *c, u64 ecode) { return ((ecode & c->cmask) - c->code) <= (u64)c->size; } /* * struct hw_perf_event.flags flags */ #define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */ #define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */ #define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */ #define PERF_X86_EVENT_PEBS_LD_HSW 0x0008 /* haswell style datala, load */ #define PERF_X86_EVENT_PEBS_NA_HSW 0x0010 /* haswell style datala, unknown */ #define PERF_X86_EVENT_EXCL 0x0020 /* HT exclusivity on counter */ #define PERF_X86_EVENT_DYNAMIC 0x0040 /* dynamic alloc'd constraint */ #define PERF_X86_EVENT_RDPMC_ALLOWED 0x0080 /* grant rdpmc permission */ #define PERF_X86_EVENT_EXCL_ACCT 0x0100 /* accounted EXCL event */ #define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */ #define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */ #define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */ #define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */ #define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */ #define PERF_X86_EVENT_TOPDOWN 0x4000 /* Count Topdown slots/metrics events */ static inline bool is_topdown_count(struct perf_event *event) { return event->hw.flags & PERF_X86_EVENT_TOPDOWN; } static inline bool is_metric_event(struct perf_event *event) { u64 config = event->attr.config; return ((config & ARCH_PERFMON_EVENTSEL_EVENT) == 0) && ((config & INTEL_ARCH_EVENT_MASK) >= INTEL_TD_METRIC_RETIRING) && ((config & INTEL_ARCH_EVENT_MASK) <= INTEL_TD_METRIC_MAX); } static inline bool is_slots_event(struct perf_event *event) { return (event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_TD_SLOTS; } static inline bool is_topdown_event(struct perf_event *event) { return is_metric_event(event) || is_slots_event(event); } struct amd_nb { int nb_id; /* NorthBridge id */ int refcnt; /* reference count */ struct perf_event *owners[X86_PMC_IDX_MAX]; struct event_constraint event_constraints[X86_PMC_IDX_MAX]; }; #define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1) #define PEBS_PMI_AFTER_EACH_RECORD BIT_ULL(60) #define PEBS_OUTPUT_OFFSET 61 #define PEBS_OUTPUT_MASK (3ull << PEBS_OUTPUT_OFFSET) #define PEBS_OUTPUT_PT (1ull << PEBS_OUTPUT_OFFSET) #define PEBS_VIA_PT_MASK (PEBS_OUTPUT_PT | PEBS_PMI_AFTER_EACH_RECORD) /* * Flags PEBS can handle without an PMI. * * TID can only be handled by flushing at context switch. * REGS_USER can be handled for events limited to ring 3. * */ #define LARGE_PEBS_FLAGS \ (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \ PERF_SAMPLE_PERIOD) #define PEBS_GP_REGS \ ((1ULL << PERF_REG_X86_AX) | \ (1ULL << PERF_REG_X86_BX) | \ (1ULL << PERF_REG_X86_CX) | \ (1ULL << PERF_REG_X86_DX) | \ (1ULL << PERF_REG_X86_DI) | \ (1ULL << PERF_REG_X86_SI) | \ (1ULL << PERF_REG_X86_SP) | \ (1ULL << PERF_REG_X86_BP) | \ (1ULL << PERF_REG_X86_IP) | \ (1ULL << PERF_REG_X86_FLAGS) | \ (1ULL << PERF_REG_X86_R8) | \ (1ULL << PERF_REG_X86_R9) | \ (1ULL << PERF_REG_X86_R10) | \ (1ULL << PERF_REG_X86_R11) | \ (1ULL << PERF_REG_X86_R12) | \ (1ULL << PERF_REG_X86_R13) | \ (1ULL << PERF_REG_X86_R14) | \ (1ULL << PERF_REG_X86_R15)) /* * Per register state. */ struct er_account { raw_spinlock_t lock; /* per-core: protect structure */ u64 config; /* extra MSR config */ u64 reg; /* extra MSR number */ atomic_t ref; /* reference count */ }; /* * Per core/cpu state * * Used to coordinate shared registers between HT threads or * among events on a single PMU. */ struct intel_shared_regs { struct er_account regs[EXTRA_REG_MAX]; int refcnt; /* per-core: #HT threads */ unsigned core_id; /* per-core: core id */ }; enum intel_excl_state_type { INTEL_EXCL_UNUSED = 0, /* counter is unused */ INTEL_EXCL_SHARED = 1, /* counter can be used by both threads */ INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */ }; struct intel_excl_states { enum intel_excl_state_type state[X86_PMC_IDX_MAX]; bool sched_started; /* true if scheduling has started */ }; struct intel_excl_cntrs { raw_spinlock_t lock; struct intel_excl_states states[2]; union { u16 has_exclusive[2]; u32 exclusive_present; }; int refcnt; /* per-core: #HT threads */ unsigned core_id; /* per-core: core id */ }; struct x86_perf_task_context; #define MAX_LBR_ENTRIES 32 enum { LBR_FORMAT_32 = 0x00, LBR_FORMAT_LIP = 0x01, LBR_FORMAT_EIP = 0x02, LBR_FORMAT_EIP_FLAGS = 0x03, LBR_FORMAT_EIP_FLAGS2 = 0x04, LBR_FORMAT_INFO = 0x05, LBR_FORMAT_TIME = 0x06, LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME, }; enum { X86_PERF_KFREE_SHARED = 0, X86_PERF_KFREE_EXCL = 1, X86_PERF_KFREE_MAX }; struct cpu_hw_events { /* * Generic x86 PMC bits */ struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; int enabled; int n_events; /* the # of events in the below arrays */ int n_added; /* the # last events in the below arrays; they've never been enabled yet */ int n_txn; /* the # last events in the below arrays; added in the current transaction */ int n_txn_pair; int n_txn_metric; int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ u64 tags[X86_PMC_IDX_MAX]; struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ struct event_constraint *event_constraint[X86_PMC_IDX_MAX]; int n_excl; /* the number of exclusive events */ unsigned int txn_flags; int is_fake; /* * Intel DebugStore bits */ struct debug_store *ds; void *ds_pebs_vaddr; void *ds_bts_vaddr; u64 pebs_enabled; int n_pebs; int n_large_pebs; int n_pebs_via_pt; int pebs_output; /* Current super set of events hardware configuration */ u64 pebs_data_cfg; u64 active_pebs_data_cfg; int pebs_record_size; /* * Intel LBR bits */ int lbr_users; int lbr_pebs_users; struct perf_branch_stack lbr_stack; struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; union { struct er_account *lbr_sel; struct er_account *lbr_ctl; }; u64 br_sel; void *last_task_ctx; int last_log_id; int lbr_select; void *lbr_xsave; /* * Intel host/guest exclude bits */ u64 intel_ctrl_guest_mask; u64 intel_ctrl_host_mask; struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX]; /* * Intel checkpoint mask */ u64 intel_cp_status; /* * manage shared (per-core, per-cpu) registers * used on Intel NHM/WSM/SNB */ struct intel_shared_regs *shared_regs; /* * manage exclusive counter access between hyperthread */ struct event_constraint *constraint_list; /* in enable order */ struct intel_excl_cntrs *excl_cntrs; int excl_thread_id; /* 0 or 1 */ /* * SKL TSX_FORCE_ABORT shadow */ u64 tfa_shadow; /* * Perf Metrics */ /* number of accepted metrics events */ int n_metric; /* * AMD specific bits */ struct amd_nb *amd_nb; /* Inverted mask of bits to clear in the perf_ctr ctrl registers */ u64 perf_ctr_virt_mask; int n_pair; /* Large increment events */ void *kfree_on_online[X86_PERF_KFREE_MAX]; struct pmu *pmu; }; #define __EVENT_CONSTRAINT_RANGE(c, e, n, m, w, o, f) { \ { .idxmsk64 = (n) }, \ .code = (c), \ .size = (e) - (c), \ .cmask = (m), \ .weight = (w), \ .overlap = (o), \ .flags = f, \ } #define __EVENT_CONSTRAINT(c, n, m, w, o, f) \ __EVENT_CONSTRAINT_RANGE(c, c, n, m, w, o, f) #define EVENT_CONSTRAINT(c, n, m) \ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0) /* * The constraint_match() function only works for 'simple' event codes * and not for extended (AMD64_EVENTSEL_EVENT) events codes. */ #define EVENT_CONSTRAINT_RANGE(c, e, n, m) \ __EVENT_CONSTRAINT_RANGE(c, e, n, m, HWEIGHT(n), 0, 0) #define INTEL_EXCLEVT_CONSTRAINT(c, n) \ __EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\ 0, PERF_X86_EVENT_EXCL) /* * The overlap flag marks event constraints with overlapping counter * masks. This is the case if the counter mask of such an event is not * a subset of any other counter mask of a constraint with an equal or * higher weight, e.g.: * * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0); * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0); * * The event scheduler may not select the correct counter in the first * cycle because it needs to know which subsequent events will be * scheduled. It may fail to schedule the events then. So we set the * overlap flag for such constraints to give the scheduler a hint which * events to select for counter rescheduling. * * Care must be taken as the rescheduling algorithm is O(n!) which * will increase scheduling cycles for an over-committed system * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros * and its counter masks must be kept at a minimum. */ #define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0) /* * Constraint on the Event code. */ #define INTEL_EVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) /* * Constraint on a range of Event codes */ #define INTEL_EVENT_CONSTRAINT_RANGE(c, e, n) \ EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT) /* * Constraint on the Event code + UMask + fixed-mask * * filter mask to validate fixed counter events. * the following filters disqualify for fixed counters: * - inv * - edge * - cnt-mask * - in_tx * - in_tx_checkpointed * The other filters are supported by fixed counters. * The any-thread option is supported starting with v3. */ #define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED) #define FIXED_EVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS) /* * The special metric counters do not actually exist. They are calculated from * the combination of the FxCtr3 + MSR_PERF_METRICS. * * The special metric counters are mapped to a dummy offset for the scheduler. * The sharing between multiple users of the same metric without multiplexing * is not allowed, even though the hardware supports that in principle. */ #define METRIC_EVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, (1ULL << (INTEL_PMC_IDX_METRIC_BASE + n)), \ INTEL_ARCH_EVENT_MASK) /* * Constraint on the Event code + UMask */ #define INTEL_UEVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) /* Constraint on specific umask bit only + event */ #define INTEL_UBIT_EVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|(c)) /* Like UEVENT_CONSTRAINT, but match flags too */ #define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) #define INTEL_EXCLUEVT_CONSTRAINT(c, n) \ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ HWEIGHT(n), 0, PERF_X86_EVENT_EXCL) #define INTEL_PLD_CONSTRAINT(c, n) \ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) #define INTEL_PST_CONSTRAINT(c, n) \ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) /* Event constraint, but match on all event flags too. */ #define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS) #define INTEL_FLAGS_EVENT_CONSTRAINT_RANGE(c, e, n) \ EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS) /* Check only flags, but allow all event/umask */ #define INTEL_ALL_EVENT_CONSTRAINT(code, n) \ EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS) /* Check flags and event code, and set the HSW store flag */ #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \ __EVENT_CONSTRAINT(code, n, \ ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) /* Check flags and event code, and set the HSW load flag */ #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \ __EVENT_CONSTRAINT(code, n, \ ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(code, end, n) \ __EVENT_CONSTRAINT_RANGE(code, end, n, \ ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \ __EVENT_CONSTRAINT(code, n, \ ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, \ PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL) /* Check flags and event code/umask, and set the HSW store flag */ #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \ __EVENT_CONSTRAINT(code, n, \ INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(code, n) \ __EVENT_CONSTRAINT(code, n, \ INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, \ PERF_X86_EVENT_PEBS_ST_HSW|PERF_X86_EVENT_EXCL) /* Check flags and event code/umask, and set the HSW load flag */ #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \ __EVENT_CONSTRAINT(code, n, \ INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(code, n) \ __EVENT_CONSTRAINT(code, n, \ INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, \ PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL) /* Check flags and event code/umask, and set the HSW N/A flag */ #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \ __EVENT_CONSTRAINT(code, n, \ INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW) /* * We define the end marker as having a weight of -1 * to enable blacklisting of events using a counter bitmask * of zero and thus a weight of zero. * The end marker has a weight that cannot possibly be * obtained from counting the bits in the bitmask. */ #define EVENT_CONSTRAINT_END { .weight = -1 } /* * Check for end marker with weight == -1 */ #define for_each_event_constraint(e, c) \ for ((e) = (c); (e)->weight != -1; (e)++) /* * Extra registers for specific events. * * Some events need large masks and require external MSRs. * Those extra MSRs end up being shared for all events on * a PMU and sometimes between PMU of sibling HT threads. * In either case, the kernel needs to handle conflicting * accesses to those extra, shared, regs. The data structure * to manage those registers is stored in cpu_hw_event. */ struct extra_reg { unsigned int event; unsigned int msr; u64 config_mask; u64 valid_mask; int idx; /* per_xxx->regs[] reg index */ bool extra_msr_access; }; #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ .event = (e), \ .msr = (ms), \ .config_mask = (m), \ .valid_mask = (vm), \ .idx = EXTRA_REG_##i, \ .extra_msr_access = true, \ } #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) #define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \ EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \ ARCH_PERFMON_EVENTSEL_UMASK, vm, idx) #define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \ INTEL_UEVENT_EXTRA_REG(c, \ MSR_PEBS_LD_LAT_THRESHOLD, \ 0xffff, \ LDLAT) #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) union perf_capabilities { struct { u64 lbr_format:6; u64 pebs_trap:1; u64 pebs_arch_reg:1; u64 pebs_format:4; u64 smm_freeze:1; /* * PMU supports separate counter range for writing * values > 32bit. */ u64 full_width_write:1; u64 pebs_baseline:1; u64 perf_metrics:1; u64 pebs_output_pt_available:1; u64 anythread_deprecated:1; }; u64 capabilities; }; struct x86_pmu_quirk { struct x86_pmu_quirk *next; void (*func)(void); }; union x86_pmu_config { struct { u64 event:8, umask:8, usr:1, os:1, edge:1, pc:1, interrupt:1, __reserved1:1, en:1, inv:1, cmask:8, event2:4, __reserved2:4, go:1, ho:1; } bits; u64 value; }; #define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value enum { x86_lbr_exclusive_lbr, x86_lbr_exclusive_bts, x86_lbr_exclusive_pt, x86_lbr_exclusive_max, }; /* * struct x86_pmu - generic x86 pmu */ struct x86_pmu { /* * Generic x86 PMC bits */ const char *name; int version; int (*handle_irq)(struct pt_regs *); void (*disable_all)(void); void (*enable_all)(int added); void (*enable)(struct perf_event *); void (*disable)(struct perf_event *); void (*add)(struct perf_event *); void (*del)(struct perf_event *); void (*read)(struct perf_event *event); int (*hw_config)(struct perf_event *event); int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); unsigned eventsel; unsigned perfctr; int (*addr_offset)(int index, bool eventsel); int (*rdpmc_index)(int index); u64 (*event_map)(int); int max_events; int num_counters; int num_counters_fixed; int cntval_bits; u64 cntval_mask; union { unsigned long events_maskl; unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)]; }; int events_mask_len; int apic; u64 max_period; struct event_constraint * (*get_event_constraints)(struct cpu_hw_events *cpuc, int idx, struct perf_event *event); void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); void (*start_scheduling)(struct cpu_hw_events *cpuc); void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr); void (*stop_scheduling)(struct cpu_hw_events *cpuc); struct event_constraint *event_constraints; struct x86_pmu_quirk *quirks; int perfctr_second_write; u64 (*limit_period)(struct perf_event *event, u64 l); /* PMI handler bits */ unsigned int late_ack :1, enabled_ack :1, counter_freezing :1; /* * sysfs attrs */ int attr_rdpmc_broken; int attr_rdpmc; struct attribute **format_attrs; ssize_t (*events_sysfs_show)(char *page, u64 config); const struct attribute_group **attr_update; unsigned long attr_freeze_on_smi; /* * CPU Hotplug hooks */ int (*cpu_prepare)(int cpu); void (*cpu_starting)(int cpu); void (*cpu_dying)(int cpu); void (*cpu_dead)(int cpu); void (*check_microcode)(void); void (*sched_task)(struct perf_event_context *ctx, bool sched_in); /* * Intel Arch Perfmon v2+ */ u64 intel_ctrl; union perf_capabilities intel_cap; /* * Intel DebugStore bits */ unsigned int bts :1, bts_active :1, pebs :1, pebs_active :1, pebs_broken :1, pebs_prec_dist :1, pebs_no_tlb :1, pebs_no_isolation :1; int pebs_record_size; int pebs_buffer_size; int max_pebs_events; void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data); struct event_constraint *pebs_constraints; void (*pebs_aliases)(struct perf_event *event); unsigned long large_pebs_flags; u64 rtm_abort_event; /* * Intel LBR */ unsigned int lbr_tos, lbr_from, lbr_to, lbr_info, lbr_nr; /* LBR base regs and size */ union { u64 lbr_sel_mask; /* LBR_SELECT valid bits */ u64 lbr_ctl_mask; /* LBR_CTL valid bits */ }; union { const int *lbr_sel_map; /* lbr_select mappings */ int *lbr_ctl_map; /* LBR_CTL mappings */ }; bool lbr_double_abort; /* duplicated lbr aborts */ bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */ /* * Intel Architectural LBR CPUID Enumeration */ unsigned int lbr_depth_mask:8; unsigned int lbr_deep_c_reset:1; unsigned int lbr_lip:1; unsigned int lbr_cpl:1; unsigned int lbr_filter:1; unsigned int lbr_call_stack:1; unsigned int lbr_mispred:1; unsigned int lbr_timed_lbr:1; unsigned int lbr_br_type:1; void (*lbr_reset)(void); void (*lbr_read)(struct cpu_hw_events *cpuc); void (*lbr_save)(void *ctx); void (*lbr_restore)(void *ctx); /* * Intel PT/LBR/BTS are exclusive */ atomic_t lbr_exclusive[x86_lbr_exclusive_max]; /* * Intel perf metrics */ u64 (*update_topdown_event)(struct perf_event *event); int (*set_topdown_event_period)(struct perf_event *event); /* * perf task context (i.e. struct perf_event_context::task_ctx_data) * switch helper to bridge calls from perf/core to perf/x86. * See struct pmu::swap_task_ctx() usage for examples; */ void (*swap_task_ctx)(struct perf_event_context *prev, struct perf_event_context *next); /* * AMD bits */ unsigned int amd_nb_constraints : 1; u64 perf_ctr_pair_en; /* * Extra registers for events */ struct extra_reg *extra_regs; unsigned int flags; /* * Intel host/guest support (KVM) */ struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); /* * Check period value for PERF_EVENT_IOC_PERIOD ioctl. */ int (*check_period) (struct perf_event *event, u64 period); int (*aux_output_match) (struct perf_event *event); }; struct x86_perf_task_context_opt { int lbr_callstack_users; int lbr_stack_state; int log_id; }; struct x86_perf_task_context { u64 lbr_sel; int tos; int valid_lbrs; struct x86_perf_task_context_opt opt; struct lbr_entry lbr[MAX_LBR_ENTRIES]; }; struct x86_perf_task_context_arch_lbr { struct x86_perf_task_context_opt opt; struct lbr_entry entries[]; }; /* * Add padding to guarantee the 64-byte alignment of the state buffer. * * The structure is dynamically allocated. The size of the LBR state may vary * based on the number of LBR registers. * * Do not put anything after the LBR state. */ struct x86_perf_task_context_arch_lbr_xsave { struct x86_perf_task_context_opt opt; union { struct xregs_state xsave; struct { struct fxregs_state i387; struct xstate_header header; struct arch_lbr_state lbr; } __attribute__ ((packed, aligned (XSAVE_ALIGNMENT))); }; }; #define x86_add_quirk(func_) \ do { \ static struct x86_pmu_quirk __quirk __initdata = { \ .func = func_, \ }; \ __quirk.next = x86_pmu.quirks; \ x86_pmu.quirks = &__quirk; \ } while (0) /* * x86_pmu flags */ #define PMU_FL_NO_HT_SHARING 0x1 /* no hyper-threading resource sharing */ #define PMU_FL_HAS_RSP_1 0x2 /* has 2 equivalent offcore_rsp regs */ #define PMU_FL_EXCL_CNTRS 0x4 /* has exclusive counter requirements */ #define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */ #define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */ #define PMU_FL_TFA 0x20 /* deal with TSX force abort */ #define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr #define EVENT_ATTR(_name, _id) \ static struct perf_pmu_events_attr EVENT_VAR(_id) = { \ .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ .id = PERF_COUNT_HW_##_id, \ .event_str = NULL, \ }; #define EVENT_ATTR_STR(_name, v, str) \ static struct perf_pmu_events_attr event_attr_##v = { \ .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ .id = 0, \ .event_str = str, \ }; #define EVENT_ATTR_STR_HT(_name, v, noht, ht) \ static struct perf_pmu_events_ht_attr event_attr_##v = { \ .attr = __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\ .id = 0, \ .event_str_noht = noht, \ .event_str_ht = ht, \ } struct pmu *x86_get_pmu(unsigned int cpu); extern struct x86_pmu x86_pmu __read_mostly; static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx) { if (static_cpu_has(X86_FEATURE_ARCH_LBR)) return &((struct x86_perf_task_context_arch_lbr *)ctx)->opt; return &((struct x86_perf_task_context *)ctx)->opt; } static inline bool x86_pmu_has_lbr_callstack(void) { return x86_pmu.lbr_sel_map && x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0; } DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); int x86_perf_event_set_period(struct perf_event *event); /* * Generalized hw caching related hw_event table, filled * in on a per model basis. A value of 0 means * 'not supported', -1 means 'hw_event makes no sense on * this CPU', any other value means the raw hw_event * ID. */ #define C(x) PERF_COUNT_HW_CACHE_##x extern u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; extern u64 __read_mostly hw_cache_extra_regs [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; u64 x86_perf_event_update(struct perf_event *event); static inline unsigned int x86_pmu_config_addr(int index) { return x86_pmu.eventsel + (x86_pmu.addr_offset ? x86_pmu.addr_offset(index, true) : index); } static inline unsigned int x86_pmu_event_addr(int index) { return x86_pmu.perfctr + (x86_pmu.addr_offset ? x86_pmu.addr_offset(index, false) : index); } static inline int x86_pmu_rdpmc_index(int index) { return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index; } int x86_add_exclusive(unsigned int what); void x86_del_exclusive(unsigned int what); int x86_reserve_hardware(void); void x86_release_hardware(void); int x86_pmu_max_precise(void); void hw_perf_lbr_event_destroy(struct perf_event *event); int x86_setup_perfctr(struct perf_event *event); int x86_pmu_hw_config(struct perf_event *event); void x86_pmu_disable_all(void); static inline bool is_counter_pair(struct hw_perf_event *hwc) { return hwc->flags & PERF_X86_EVENT_PAIR; } static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, u64 enable_mask) { u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); if (hwc->extra_reg.reg) wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); /* * Add enabled Merge event on next counter * if large increment event being enabled on this counter */ if (is_counter_pair(hwc)) wrmsrl(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en); wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask); } void x86_pmu_enable_all(int added); int perf_assign_events(struct event_constraint **constraints, int n, int wmin, int wmax, int gpmax, int *assign); int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); void x86_pmu_stop(struct perf_event *event, int flags); static inline void x86_pmu_disable_event(struct perf_event *event) { u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); struct hw_perf_event *hwc = &event->hw; wrmsrl(hwc->config_base, hwc->config & ~disable_mask); if (is_counter_pair(hwc)) wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0); } void x86_pmu_enable_event(struct perf_event *event); int x86_pmu_handle_irq(struct pt_regs *regs); extern struct event_constraint emptyconstraint; extern struct event_constraint unconstrained; static inline bool kernel_ip(unsigned long ip) { #ifdef CONFIG_X86_32 return ip > PAGE_OFFSET; #else return (long)ip < 0; #endif } /* * Not all PMUs provide the right context information to place the reported IP * into full context. Specifically segment registers are typically not * supplied. * * Assuming the address is a linear address (it is for IBS), we fake the CS and * vm86 mode using the known zero-based code segment and 'fix up' the registers * to reflect this. * * Intel PEBS/LBR appear to typically provide the effective address, nothing * much we can do about that but pray and treat it like a linear address. */ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) { regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS; if (regs->flags & X86_VM_MASK) regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK); regs->ip = ip; } ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event); ssize_t intel_event_sysfs_show(char *page, u64 config); ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); #ifdef CONFIG_CPU_SUP_AMD int amd_pmu_init(void); #else /* CONFIG_CPU_SUP_AMD */ static inline int amd_pmu_init(void) { return 0; } #endif /* CONFIG_CPU_SUP_AMD */ static inline int is_pebs_pt(struct perf_event *event) { return !!(event->hw.flags & PERF_X86_EVENT_PEBS_VIA_PT); } #ifdef CONFIG_CPU_SUP_INTEL static inline bool intel_pmu_has_bts_period(struct perf_event *event, u64 period) { struct hw_perf_event *hwc = &event->hw; unsigned int hw_event, bts_event; if (event->attr.freq) return false; hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); return hw_event == bts_event && period == 1; } static inline bool intel_pmu_has_bts(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; return intel_pmu_has_bts_period(event, hwc->sample_period); } int intel_pmu_save_and_restart(struct perf_event *event); struct event_constraint * x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event); extern int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu); extern void intel_cpuc_finish(struct cpu_hw_events *cpuc); int intel_pmu_init(void); void init_debug_store_on_cpu(int cpu); void fini_debug_store_on_cpu(int cpu); void release_ds_buffers(void); void reserve_ds_buffers(void); void release_lbr_buffers(void); void reserve_lbr_buffers(void); extern struct event_constraint bts_constraint; extern struct event_constraint vlbr_constraint; void intel_pmu_enable_bts(u64 config); void intel_pmu_disable_bts(void); int intel_pmu_drain_bts_buffer(void); extern struct event_constraint intel_core2_pebs_event_constraints[]; extern struct event_constraint intel_atom_pebs_event_constraints[]; extern struct event_constraint intel_slm_pebs_event_constraints[]; extern struct event_constraint intel_glm_pebs_event_constraints[]; extern struct event_constraint intel_glp_pebs_event_constraints[]; extern struct event_constraint intel_nehalem_pebs_event_constraints[]; extern struct event_constraint intel_westmere_pebs_event_constraints[]; extern struct event_constraint intel_snb_pebs_event_constraints[]; extern struct event_constraint intel_ivb_pebs_event_constraints[]; extern struct event_constraint intel_hsw_pebs_event_constraints[]; extern struct event_constraint intel_bdw_pebs_event_constraints[]; extern struct event_constraint intel_skl_pebs_event_constraints[]; extern struct event_constraint intel_icl_pebs_event_constraints[]; struct event_constraint *intel_pebs_constraints(struct perf_event *event); void intel_pmu_pebs_add(struct perf_event *event); void intel_pmu_pebs_del(struct perf_event *event); void intel_pmu_pebs_enable(struct perf_event *event); void intel_pmu_pebs_disable(struct perf_event *event); void intel_pmu_pebs_enable_all(void); void intel_pmu_pebs_disable_all(void); void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in); void intel_pmu_auto_reload_read(struct perf_event *event); void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr); void intel_ds_init(void); void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev, struct perf_event_context *next); void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); u64 lbr_from_signext_quirk_wr(u64 val); void intel_pmu_lbr_reset(void); void intel_pmu_lbr_reset_32(void); void intel_pmu_lbr_reset_64(void); void intel_pmu_lbr_add(struct perf_event *event); void intel_pmu_lbr_del(struct perf_event *event); void intel_pmu_lbr_enable_all(bool pmi); void intel_pmu_lbr_disable_all(void); void intel_pmu_lbr_read(void); void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc); void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc); void intel_pmu_lbr_save(void *ctx); void intel_pmu_lbr_restore(void *ctx); void intel_pmu_lbr_init_core(void); void intel_pmu_lbr_init_nhm(void); void intel_pmu_lbr_init_atom(void); void intel_pmu_lbr_init_slm(void); void intel_pmu_lbr_init_snb(void); void intel_pmu_lbr_init_hsw(void); void intel_pmu_lbr_init_skl(void); void intel_pmu_lbr_init_knl(void); void intel_pmu_arch_lbr_init(void); void intel_pmu_pebs_data_source_nhm(void); void intel_pmu_pebs_data_source_skl(bool pmem); int intel_pmu_setup_lbr_filter(struct perf_event *event); void intel_pt_interrupt(void); int intel_bts_interrupt(void); void intel_bts_enable_local(void); void intel_bts_disable_local(void); int p4_pmu_init(void); int p6_pmu_init(void); int knc_pmu_init(void); static inline int is_ht_workaround_enabled(void) { return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED); } #else /* CONFIG_CPU_SUP_INTEL */ static inline void reserve_ds_buffers(void) { } static inline void release_ds_buffers(void) { } static inline void release_lbr_buffers(void) { } static inline void reserve_lbr_buffers(void) { } static inline int intel_pmu_init(void) { return 0; } static inline int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) { return 0; } static inline void intel_cpuc_finish(struct cpu_hw_events *cpuc) { } static inline int is_ht_workaround_enabled(void) { return 0; } #endif /* CONFIG_CPU_SUP_INTEL */ #if ((defined CONFIG_CPU_SUP_CENTAUR) || (defined CONFIG_CPU_SUP_ZHAOXIN)) int zhaoxin_pmu_init(void); #else static inline int zhaoxin_pmu_init(void) { return 0; } #endif /*CONFIG_CPU_SUP_CENTAUR or CONFIG_CPU_SUP_ZHAOXIN*/
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the IP module. * * Version: @(#)ip.h 1.0.2 05/07/93 * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox, <gw4pts@gw4pts.ampr.org> * * Changes: * Mike McLagan : Routing by source */ #ifndef _IP_H #define _IP_H #include <linux/types.h> #include <linux/ip.h> #include <linux/in.h> #include <linux/skbuff.h> #include <linux/jhash.h> #include <linux/sockptr.h> #include <net/inet_sock.h> #include <net/route.h> #include <net/snmp.h> #include <net/flow.h> #include <net/flow_dissector.h> #include <net/netns/hash.h> #include <net/lwtunnel.h> #define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ #define IPV4_MIN_MTU 68 /* RFC 791 */ extern unsigned int sysctl_fib_sync_mem; extern unsigned int sysctl_fib_sync_mem_min; extern unsigned int sysctl_fib_sync_mem_max; struct sock; struct inet_skb_parm { int iif; struct ip_options opt; /* Compiled IP options */ u16 flags; #define IPSKB_FORWARDED BIT(0) #define IPSKB_XFRM_TUNNEL_SIZE BIT(1) #define IPSKB_XFRM_TRANSFORMED BIT(2) #define IPSKB_FRAG_COMPLETE BIT(3) #define IPSKB_REROUTED BIT(4) #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) #define IPSKB_L3SLAVE BIT(7) u16 frag_max_size; }; static inline bool ipv4_l3mdev_skb(u16 flags) { return !!(flags & IPSKB_L3SLAVE); } static inline unsigned int ip_hdrlen(const struct sk_buff *skb) { return ip_hdr(skb)->ihl * 4; } struct ipcm_cookie { struct sockcm_cookie sockc; __be32 addr; int oif; struct ip_options_rcu *opt; __u8 ttl; __s16 tos; char priority; __u16 gso_size; }; static inline void ipcm_init(struct ipcm_cookie *ipcm) { *ipcm = (struct ipcm_cookie) { .tos = -1 }; } static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, const struct inet_sock *inet) { ipcm_init(ipcm); ipcm->sockc.mark = inet->sk.sk_mark; ipcm->sockc.tsflags = inet->sk.sk_tsflags; ipcm->oif = inet->sk.sk_bound_dev_if; ipcm->addr = inet->inet_saddr; } #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) #define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb)) /* return enslaved device index if relevant */ static inline int inet_sdif(struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) return IPCB(skb)->iif; #endif return 0; } /* Special input handler for packets caught by router alert option. They are selected only by protocol field, and then processed likely local ones; but only if someone wants them! Otherwise, router not running rsvpd will kill RSVP. It is user level problem, what it will make with them. I have no idea, how it will masquearde or NAT them (it is joke, joke :-)), but receiver should be enough clever f.e. to forward mtrace requests, sent to multicast group to reach destination designated router. */ struct ip_ra_chain { struct ip_ra_chain __rcu *next; struct sock *sk; union { void (*destructor)(struct sock *); struct sock *saved_sk; }; struct rcu_head rcu; }; /* IP flags. */ #define IP_CE 0x8000 /* Flag: "Congestion" */ #define IP_DF 0x4000 /* Flag: "Don't Fragment" */ #define IP_MF 0x2000 /* Flag: "More Fragments" */ #define IP_OFFSET 0x1FFF /* "Fragment Offset" part */ #define IP_FRAG_TIME (30 * HZ) /* fragment lifetime */ struct msghdr; struct net_device; struct packet_type; struct rtable; struct sockaddr; int igmp_mc_init(void); /* * Functions provided by ip.c */ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, __be32 saddr, __be32 daddr, struct ip_options_rcu *opt, u8 tos); int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); void ip_list_rcv(struct list_head *head, struct packet_type *pt, struct net_device *orig_dev); int ip_local_deliver(struct sk_buff *skb); void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int proto); int ip_mr_input(struct sk_buff *skb); int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); struct ip_fraglist_iter { struct sk_buff *frag; struct iphdr *iph; int offset; unsigned int hlen; }; void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph, unsigned int hlen, struct ip_fraglist_iter *iter); void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter); static inline struct sk_buff *ip_fraglist_next(struct ip_fraglist_iter *iter) { struct sk_buff *skb = iter->frag; iter->frag = skb->next; skb_mark_not_on_list(skb); return skb; } struct ip_frag_state { bool DF; unsigned int hlen; unsigned int ll_rs; unsigned int mtu; unsigned int left; int offset; int ptr; __be16 not_last_frag; }; void ip_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int ll_rs, unsigned int mtu, bool DF, struct ip_frag_state *state); struct sk_buff *ip_frag_next(struct sk_buff *skb, struct ip_frag_state *state); void ip_send_check(struct iphdr *ip); int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, __u8 tos); void ip_init(void); int ip_append_data(struct sock *sk, struct flowi4 *fl4, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int len, int protolen, struct ipcm_cookie *ipc, struct rtable **rt, unsigned int flags); int ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb); ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, int offset, size_t size, int flags); struct sk_buff *__ip_make_skb(struct sock *sk, struct flowi4 *fl4, struct sk_buff_head *queue, struct inet_cork *cork); int ip_send_skb(struct net *net, struct sk_buff *skb); int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4); void ip_flush_pending_frames(struct sock *sk); struct sk_buff *ip_make_skb(struct sock *sk, struct flowi4 *fl4, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, struct ipcm_cookie *ipc, struct rtable **rtp, struct inet_cork *cork, unsigned int flags); int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl); static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4) { return __ip_make_skb(sk, fl4, &sk->sk_write_queue, &inet_sk(sk)->cork.base); } static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet) { return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(inet->tos); } static inline __u8 get_rtconn_flags(struct ipcm_cookie* ipc, struct sock* sk) { return (ipc->tos != -1) ? RT_CONN_FLAGS_TOS(sk, ipc->tos) : RT_CONN_FLAGS(sk); } /* datagram.c */ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); void ip4_datagram_release_cb(struct sock *sk); struct ip_reply_arg { struct kvec iov[1]; int flags; __wsum csum; int csumoffset; /* u16 offset of csum in iov[0].iov_base */ /* -1 if not needed */ int bound_dev_if; u8 tos; kuid_t uid; }; #define IP_REPLY_ARG_NOSRCCHECK 1 static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) { return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0; } void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, const struct ip_options *sopt, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, unsigned int len, u64 transmit_time); #define IP_INC_STATS(net, field) SNMP_INC_STATS64((net)->mib.ip_statistics, field) #define __IP_INC_STATS(net, field) __SNMP_INC_STATS64((net)->mib.ip_statistics, field) #define IP_ADD_STATS(net, field, val) SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val) #define __IP_ADD_STATS(net, field, val) __SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val) #define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val) #define __IP_UPD_PO_STATS(net, field, val) __SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val) #define NET_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.net_statistics, field) #define __NET_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.net_statistics, field) #define NET_ADD_STATS(net, field, adnd) SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd) #define __NET_ADD_STATS(net, field, adnd) __SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd) u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct); unsigned long snmp_fold_field(void __percpu *mib, int offt); #if BITS_PER_LONG==32 u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offct, size_t syncp_offset); u64 snmp_fold_field64(void __percpu *mib, int offt, size_t sync_off); #else static inline u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offct, size_t syncp_offset) { return snmp_get_cpu_field(mib, cpu, offct); } static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_off) { return snmp_fold_field(mib, offt); } #endif #define snmp_get_cpu_field64_batch(buff64, stats_list, mib_statistic, offset) \ { \ int i, c; \ for_each_possible_cpu(c) { \ for (i = 0; stats_list[i].name; i++) \ buff64[i] += snmp_get_cpu_field64( \ mib_statistic, \ c, stats_list[i].entry, \ offset); \ } \ } #define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \ { \ int i, c; \ for_each_possible_cpu(c) { \ for (i = 0; stats_list[i].name; i++) \ buff[i] += snmp_get_cpu_field( \ mib_statistic, \ c, stats_list[i].entry); \ } \ } void inet_get_local_port_range(struct net *net, int *low, int *high); #ifdef CONFIG_SYSCTL static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port) { if (!net->ipv4.sysctl_local_reserved_ports) return false; return test_bit(port, net->ipv4.sysctl_local_reserved_ports); } static inline bool sysctl_dev_name_is_allowed(const char *name) { return strcmp(name, "default") != 0 && strcmp(name, "all") != 0; } static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port) { return port < net->ipv4.sysctl_ip_prot_sock; } #else static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port) { return false; } static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port) { return port < PROT_SOCK; } #endif __be32 inet_current_timestamp(void); /* From inetpeer.c */ extern int inet_peer_threshold; extern int inet_peer_minttl; extern int inet_peer_maxttl; void ipfrag_init(void); void ip_static_sysctl_init(void); #define IP4_REPLY_MARK(net, mark) \ ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) static inline bool ip_is_fragment(const struct iphdr *iph) { return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; } #ifdef CONFIG_INET #include <net/dst.h> /* The function in 2.2 was invalid, producing wrong result for * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */ static inline int ip_decrease_ttl(struct iphdr *iph) { u32 check = (__force u32)iph->check; check += (__force u32)htons(0x0100); iph->check = (__force __sum16)(check + (check>=0xFFFF)); return --iph->ttl; } static inline int ip_mtu_locked(const struct dst_entry *dst) { const struct rtable *rt = (const struct rtable *)dst; return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU); } static inline int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst) { u8 pmtudisc = READ_ONCE(inet_sk(sk)->pmtudisc); return pmtudisc == IP_PMTUDISC_DO || (pmtudisc == IP_PMTUDISC_WANT && !ip_mtu_locked(dst)); } static inline bool ip_sk_accept_pmtu(const struct sock *sk) { return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE && inet_sk(sk)->pmtudisc != IP_PMTUDISC_OMIT; } static inline bool ip_sk_use_pmtu(const struct sock *sk) { return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE; } static inline bool ip_sk_ignore_df(const struct sock *sk) { return inet_sk(sk)->pmtudisc < IP_PMTUDISC_DO || inet_sk(sk)->pmtudisc == IP_PMTUDISC_OMIT; } static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { struct net *net = dev_net(dst->dev); unsigned int mtu; if (net->ipv4.sysctl_ip_fwd_use_pmtu || ip_mtu_locked(dst) || !forwarding) return dst_mtu(dst); /* 'forwarding = true' case should always honour route mtu */ mtu = dst_metric_raw(dst, RTAX_MTU); if (!mtu) mtu = min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU); return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } static inline unsigned int ip_skb_dst_mtu(struct sock *sk, const struct sk_buff *skb) { unsigned int mtu; if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) { bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding); } mtu = min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); return mtu - lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); } struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, int fc_mx_len, struct netlink_ext_ack *extack); static inline void ip_fib_metrics_put(struct dst_metrics *fib_metrics) { if (fib_metrics != &dst_default_metrics && refcount_dec_and_test(&fib_metrics->refcnt)) kfree(fib_metrics); } /* ipv4 and ipv6 both use refcounted metrics if it is not the default */ static inline void ip_dst_init_metrics(struct dst_entry *dst, struct dst_metrics *fib_metrics) { dst_init_metrics(dst, fib_metrics->metrics, true); if (fib_metrics != &dst_default_metrics) { dst->_metrics |= DST_METRICS_REFCOUNTED; refcount_inc(&fib_metrics->refcnt); } } static inline void ip_dst_metrics_put(struct dst_entry *dst) { struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) kfree(p); } u32 ip_idents_reserve(u32 hash, int segs); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs); static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, struct sock *sk, int segs) { struct iphdr *iph = ip_hdr(skb); if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { /* This is only to work around buggy Windows95/2000 * VJ compression implementations. If the ID field * does not change, they drop every other packet in * a TCP stream using header compression. */ if (sk && inet_sk(sk)->inet_daddr) { iph->id = htons(inet_sk(sk)->inet_id); inet_sk(sk)->inet_id += segs; } else { iph->id = 0; } } else { __ip_select_ident(net, iph, segs); } } static inline void ip_select_ident(struct net *net, struct sk_buff *skb, struct sock *sk) { ip_select_ident_segs(net, skb, sk, 1); } static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto) { return csum_tcpudp_nofold(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, skb->len, proto, 0); } /* copy IPv4 saddr & daddr to flow_keys, possibly using 64bit load/store * Equivalent to : flow->v4addrs.src = iph->saddr; * flow->v4addrs.dst = iph->daddr; */ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow, const struct iphdr *iph) { BUILD_BUG_ON(offsetof(typeof(flow->addrs), v4addrs.dst) != offsetof(typeof(flow->addrs), v4addrs.src) + sizeof(flow->addrs.v4addrs.src)); memcpy(&flow->addrs.v4addrs, &iph->saddr, sizeof(flow->addrs.v4addrs)); flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto) { const struct iphdr *iph = skb_gro_network_header(skb); return csum_tcpudp_nofold(iph->saddr, iph->daddr, skb_gro_len(skb), proto, 0); } /* * Map a multicast IP onto multicast MAC for type ethernet. */ static inline void ip_eth_mc_map(__be32 naddr, char *buf) { __u32 addr=ntohl(naddr); buf[0]=0x01; buf[1]=0x00; buf[2]=0x5e; buf[5]=addr&0xFF; addr>>=8; buf[4]=addr&0xFF; addr>>=8; buf[3]=addr&0x7F; } /* * Map a multicast IP onto multicast MAC for type IP-over-InfiniBand. * Leave P_Key as 0 to be filled in by driver. */ static inline void ip_ib_mc_map(__be32 naddr, const unsigned char *broadcast, char *buf) { __u32 addr; unsigned char scope = broadcast[5] & 0xF; buf[0] = 0; /* Reserved */ buf[1] = 0xff; /* Multicast QPN */ buf[2] = 0xff; buf[3] = 0xff; addr = ntohl(naddr); buf[4] = 0xff; buf[5] = 0x10 | scope; /* scope from broadcast address */ buf[6] = 0x40; /* IPv4 signature */ buf[7] = 0x1b; buf[8] = broadcast[8]; /* P_Key */ buf[9] = broadcast[9]; buf[10] = 0; buf[11] = 0; buf[12] = 0; buf[13] = 0; buf[14] = 0; buf[15] = 0; buf[19] = addr & 0xff; addr >>= 8; buf[18] = addr & 0xff; addr >>= 8; buf[17] = addr & 0xff; addr >>= 8; buf[16] = addr & 0x0f; } static inline void ip_ipgre_mc_map(__be32 naddr, const unsigned char *broadcast, char *buf) { if ((broadcast[0] | broadcast[1] | broadcast[2] | broadcast[3]) != 0) memcpy(buf, broadcast, 4); else memcpy(buf, &naddr, sizeof(naddr)); } #if IS_ENABLED(CONFIG_IPV6) #include <linux/ipv6.h> #endif static __inline__ void inet_reset_saddr(struct sock *sk) { inet_sk(sk)->inet_rcv_saddr = inet_sk(sk)->inet_saddr = 0; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); memset(&np->saddr, 0, sizeof(np->saddr)); memset(&sk->sk_v6_rcv_saddr, 0, sizeof(sk->sk_v6_rcv_saddr)); } #endif } #endif static inline unsigned int ipv4_addr_hash(__be32 ip) { return (__force unsigned int) ip; } static inline u32 ipv4_portaddr_hash(const struct net *net, __be32 saddr, unsigned int port) { return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; } bool ip_call_ra_chain(struct sk_buff *skb); /* * Functions provided by ip_fragment.c */ enum ip_defrag_users { IP_DEFRAG_LOCAL_DELIVER, IP_DEFRAG_CALL_RA_CHAIN, IP_DEFRAG_CONNTRACK_IN, __IP_DEFRAG_CONNTRACK_IN_END = IP_DEFRAG_CONNTRACK_IN + USHRT_MAX, IP_DEFRAG_CONNTRACK_OUT, __IP_DEFRAG_CONNTRACK_OUT_END = IP_DEFRAG_CONNTRACK_OUT + USHRT_MAX, IP_DEFRAG_CONNTRACK_BRIDGE_IN, __IP_DEFRAG_CONNTRACK_BRIDGE_IN = IP_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX, IP_DEFRAG_VS_IN, IP_DEFRAG_VS_OUT, IP_DEFRAG_VS_FWD, IP_DEFRAG_AF_PACKET, IP_DEFRAG_MACVLAN, }; /* Return true if the value of 'user' is between 'lower_bond' * and 'upper_bond' inclusively. */ static inline bool ip_defrag_user_in_between(u32 user, enum ip_defrag_users lower_bond, enum ip_defrag_users upper_bond) { return user >= lower_bond && user <= upper_bond; } int ip_defrag(struct net *net, struct sk_buff *skb, u32 user); #ifdef CONFIG_INET struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user); #else static inline struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) { return skb; } #endif /* * Functions provided by ip_forward.c */ int ip_forward(struct sk_buff *skb); /* * Functions provided by ip_options.c */ void ip_options_build(struct sk_buff *skb, struct ip_options *opt, __be32 daddr, struct rtable *rt, int is_frag); int __ip_options_echo(struct net *net, struct ip_options *dopt, struct sk_buff *skb, const struct ip_options *sopt); static inline int ip_options_echo(struct net *net, struct ip_options *dopt, struct sk_buff *skb) { return __ip_options_echo(net, dopt, skb, &IPCB(skb)->opt); } void ip_options_fragment(struct sk_buff *skb); int __ip_options_compile(struct net *net, struct ip_options *opt, struct sk_buff *skb, __be32 *info); int ip_options_compile(struct net *net, struct ip_options *opt, struct sk_buff *skb); int ip_options_get(struct net *net, struct ip_options_rcu **optp, sockptr_t data, int optlen); void ip_options_undo(struct ip_options *opt); void ip_forward_options(struct sk_buff *skb); int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev); /* * Functions provided by ip_sockglue.c */ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)); int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, u32 info); static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) { ip_cmsg_recv_offset(msg, skb->sk, skb, 0, 0); } bool icmp_global_allow(void); extern int sysctl_icmp_msgs_per_sec; extern int sysctl_icmp_msgs_burst; #ifdef CONFIG_PROC_FS int ip_misc_proc_init(void); #endif int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family, struct netlink_ext_ack *extack); static inline bool inetdev_valid_mtu(unsigned int mtu) { return likely(mtu >= IPV4_MIN_MTU); } void ip_sock_set_freebind(struct sock *sk); int ip_sock_set_mtu_discover(struct sock *sk, int val); void ip_sock_set_pktinfo(struct sock *sk); void ip_sock_set_recverr(struct sock *sk); void ip_sock_set_tos(struct sock *sk, int val); #endif /* _IP_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef LINUX_MM_INLINE_H #define LINUX_MM_INLINE_H #include <linux/huge_mm.h> #include <linux/swap.h> /** * page_is_file_lru - should the page be on a file LRU or anon LRU? * @page: the page to test * * Returns 1 if @page is a regular filesystem backed page cache page or a lazily * freed anonymous page (e.g. via MADV_FREE). Returns 0 if @page is a normal * anonymous page, a tmpfs page or otherwise ram or swap backed page. Used by * functions that manipulate the LRU lists, to sort a page onto the right LRU * list. * * We would like to get this info without a page flag, but the state * needs to survive until the page is last deleted from the LRU, which * could be as far down as __page_cache_release. */ static inline int page_is_file_lru(struct page *page) { return !PageSwapBacked(page); } static __always_inline void __update_lru_size(struct lruvec *lruvec, enum lru_list lru, enum zone_type zid, int nr_pages) { struct pglist_data *pgdat = lruvec_pgdat(lruvec); __mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages); __mod_zone_page_state(&pgdat->node_zones[zid], NR_ZONE_LRU_BASE + lru, nr_pages); } static __always_inline void update_lru_size(struct lruvec *lruvec, enum lru_list lru, enum zone_type zid, int nr_pages) { __update_lru_size(lruvec, lru, zid, nr_pages); #ifdef CONFIG_MEMCG mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages); #endif } static __always_inline void add_page_to_lru_list(struct page *page, struct lruvec *lruvec, enum lru_list lru) { update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); list_add(&page->lru, &lruvec->lists[lru]); } static __always_inline void add_page_to_lru_list_tail(struct page *page, struct lruvec *lruvec, enum lru_list lru) { update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); list_add_tail(&page->lru, &lruvec->lists[lru]); } static __always_inline void del_page_from_lru_list(struct page *page, struct lruvec *lruvec, enum lru_list lru) { list_del(&page->lru); update_lru_size(lruvec, lru, page_zonenum(page), -thp_nr_pages(page)); } /** * page_lru_base_type - which LRU list type should a page be on? * @page: the page to test * * Used for LRU list index arithmetic. * * Returns the base LRU type - file or anon - @page should be on. */ static inline enum lru_list page_lru_base_type(struct page *page) { if (page_is_file_lru(page)) return LRU_INACTIVE_FILE; return LRU_INACTIVE_ANON; } /** * page_off_lru - which LRU list was page on? clearing its lru flags. * @page: the page to test * * Returns the LRU list a page was on, as an index into the array of LRU * lists; and clears its Unevictable or Active flags, ready for freeing. */ static __always_inline enum lru_list page_off_lru(struct page *page) { enum lru_list lru; if (PageUnevictable(page)) { __ClearPageUnevictable(page); lru = LRU_UNEVICTABLE; } else { lru = page_lru_base_type(page); if (PageActive(page)) { __ClearPageActive(page); lru += LRU_ACTIVE; } } return lru; } /** * page_lru - which LRU list should a page be on? * @page: the page to test * * Returns the LRU list a page should be on, as an index * into the array of LRU lists. */ static __always_inline enum lru_list page_lru(struct page *page) { enum lru_list lru; if (PageUnevictable(page)) lru = LRU_UNEVICTABLE; else { lru = page_lru_base_type(page); if (PageActive(page)) lru += LRU_ACTIVE; } return lru; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_UNALIGNED_ACCESS_OK_H #define _LINUX_UNALIGNED_ACCESS_OK_H #include <linux/kernel.h> #include <asm/byteorder.h> static __always_inline u16 get_unaligned_le16(const void *p) { return le16_to_cpup((__le16 *)p); } static __always_inline u32 get_unaligned_le32(const void *p) { return le32_to_cpup((__le32 *)p); } static __always_inline u64 get_unaligned_le64(const void *p) { return le64_to_cpup((__le64 *)p); } static __always_inline u16 get_unaligned_be16(const void *p) { return be16_to_cpup((__be16 *)p); } static __always_inline u32 get_unaligned_be32(const void *p) { return be32_to_cpup((__be32 *)p); } static __always_inline u64 get_unaligned_be64(const void *p) { return be64_to_cpup((__be64 *)p); } static __always_inline void put_unaligned_le16(u16 val, void *p) { *((__le16 *)p) = cpu_to_le16(val); } static __always_inline void put_unaligned_le32(u32 val, void *p) { *((__le32 *)p) = cpu_to_le32(val); } static __always_inline void put_unaligned_le64(u64 val, void *p) { *((__le64 *)p) = cpu_to_le64(val); } static __always_inline void put_unaligned_be16(u16 val, void *p) { *((__be16 *)p) = cpu_to_be16(val); } static __always_inline void put_unaligned_be32(u32 val, void *p) { *((__be32 *)p) = cpu_to_be32(val); } static __always_inline void put_unaligned_be64(u64 val, void *p) { *((__be64 *)p) = cpu_to_be64(val); } #endif /* _LINUX_UNALIGNED_ACCESS_OK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PID_H #define _LINUX_PID_H #include <linux/rculist.h> #include <linux/wait.h> #include <linux/refcount.h> enum pid_type { PIDTYPE_PID, PIDTYPE_TGID, PIDTYPE_PGID, PIDTYPE_SID, PIDTYPE_MAX, }; /* * What is struct pid? * * A struct pid is the kernel's internal notion of a process identifier. * It refers to individual tasks, process groups, and sessions. While * there are processes attached to it the struct pid lives in a hash * table, so it and then the processes that it refers to can be found * quickly from the numeric pid value. The attached processes may be * quickly accessed by following pointers from struct pid. * * Storing pid_t values in the kernel and referring to them later has a * problem. The process originally with that pid may have exited and the * pid allocator wrapped, and another process could have come along * and been assigned that pid. * * Referring to user space processes by holding a reference to struct * task_struct has a problem. When the user space process exits * the now useless task_struct is still kept. A task_struct plus a * stack consumes around 10K of low kernel memory. More precisely * this is THREAD_SIZE + sizeof(struct task_struct). By comparison * a struct pid is about 64 bytes. * * Holding a reference to struct pid solves both of these problems. * It is small so holding a reference does not consume a lot of * resources, and since a new struct pid is allocated when the numeric pid * value is reused (when pids wrap around) we don't mistakenly refer to new * processes. */ /* * struct upid is used to get the id of the struct pid, as it is * seen in particular namespace. Later the struct pid is found with * find_pid_ns() using the int nr and struct pid_namespace *ns. */ struct upid { int nr; struct pid_namespace *ns; }; struct pid { refcount_t count; unsigned int level; spinlock_t lock; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; struct hlist_head inodes; /* wait queue for pidfd notifications */ wait_queue_head_t wait_pidfd; struct rcu_head rcu; struct upid numbers[1]; }; extern struct pid init_struct_pid; extern const struct file_operations pidfd_fops; struct file; extern struct pid *pidfd_pid(const struct file *file); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); static inline struct pid *get_pid(struct pid *pid) { if (pid) refcount_inc(&pid->count); return pid; } extern void put_pid(struct pid *pid); extern struct task_struct *pid_task(struct pid *pid, enum pid_type); static inline bool pid_has_task(struct pid *pid, enum pid_type type) { return !hlist_empty(&pid->tasks[type]); } extern struct task_struct *get_pid_task(struct pid *pid, enum pid_type); extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type); /* * these helpers must be called with the tasklist_lock write-held. */ extern void attach_pid(struct task_struct *task, enum pid_type); extern void detach_pid(struct task_struct *task, enum pid_type); extern void change_pid(struct task_struct *task, enum pid_type, struct pid *pid); extern void exchange_tids(struct task_struct *task, struct task_struct *old); extern void transfer_pid(struct task_struct *old, struct task_struct *new, enum pid_type); struct pid_namespace; extern struct pid_namespace init_pid_ns; extern int pid_max; extern int pid_max_min, pid_max_max; /* * look up a PID in the hash table. Must be called with the tasklist_lock * or rcu_read_lock() held. * * find_pid_ns() finds the pid in the namespace specified * find_vpid() finds the pid by its virtual id, i.e. in the current namespace * * see also find_task_by_vpid() set in include/linux/sched.h */ extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns); extern struct pid *find_vpid(int nr); /* * Lookup a PID in the hash table, and return with it's count elevated. */ extern struct pid *find_get_pid(int nr); extern struct pid *find_ge_pid(int nr, struct pid_namespace *); extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, size_t set_tid_size); extern void free_pid(struct pid *pid); extern void disable_pid_allocation(struct pid_namespace *ns); /* * ns_of_pid() returns the pid namespace in which the specified pid was * allocated. * * NOTE: * ns_of_pid() is expected to be called for a process (task) that has * an attached 'struct pid' (see attach_pid(), detach_pid()) i.e @pid * is expected to be non-NULL. If @pid is NULL, caller should handle * the resulting NULL pid-ns. */ static inline struct pid_namespace *ns_of_pid(struct pid *pid) { struct pid_namespace *ns = NULL; if (pid) ns = pid->numbers[pid->level].ns; return ns; } /* * is_child_reaper returns true if the pid is the init process * of the current namespace. As this one could be checked before * pid_ns->child_reaper is assigned in copy_process, we check * with the pid number. */ static inline bool is_child_reaper(struct pid *pid) { return pid->numbers[pid->level].nr == 1; } /* * the helpers to get the pid's id seen from different namespaces * * pid_nr() : global id, i.e. the id seen from the init namespace; * pid_vnr() : virtual id, i.e. the id seen from the pid namespace of * current. * pid_nr_ns() : id seen from the ns specified. * * see also task_xid_nr() etc in include/linux/sched.h */ static inline pid_t pid_nr(struct pid *pid) { pid_t nr = 0; if (pid) nr = pid->numbers[0].nr; return nr; } pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns); pid_t pid_vnr(struct pid *pid); #define do_each_pid_task(pid, type, task) \ do { \ if ((pid) != NULL) \ hlist_for_each_entry_rcu((task), \ &(pid)->tasks[type], pid_links[type]) { /* * Both old and new leaders may be attached to * the same pid in the middle of de_thread(). */ #define while_each_pid_task(pid, type, task) \ if (type == PIDTYPE_PID) \ break; \ } \ } while (0) #define do_each_pid_thread(pid, type, task) \ do_each_pid_task(pid, type, task) { \ struct task_struct *tg___ = task; \ for_each_thread(tg___, task) { #define while_each_pid_thread(pid, type, task) \ } \ task = tg___; \ } while_each_pid_task(pid, type, task) #endif /* _LINUX_PID_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM cgroup #if !defined(_TRACE_CGROUP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_CGROUP_H #include <linux/cgroup.h> #include <linux/tracepoint.h> DECLARE_EVENT_CLASS(cgroup_root, TP_PROTO(struct cgroup_root *root), TP_ARGS(root), TP_STRUCT__entry( __field( int, root ) __field( u16, ss_mask ) __string( name, root->name ) ), TP_fast_assign( __entry->root = root->hierarchy_id; __entry->ss_mask = root->subsys_mask; __assign_str(name, root->name); ), TP_printk("root=%d ss_mask=%#x name=%s", __entry->root, __entry->ss_mask, __get_str(name)) ); DEFINE_EVENT(cgroup_root, cgroup_setup_root, TP_PROTO(struct cgroup_root *root), TP_ARGS(root) ); DEFINE_EVENT(cgroup_root, cgroup_destroy_root, TP_PROTO(struct cgroup_root *root), TP_ARGS(root) ); DEFINE_EVENT(cgroup_root, cgroup_remount, TP_PROTO(struct cgroup_root *root), TP_ARGS(root) ); DECLARE_EVENT_CLASS(cgroup, TP_PROTO(struct cgroup *cgrp, const char *path), TP_ARGS(cgrp, path), TP_STRUCT__entry( __field( int, root ) __field( int, id ) __field( int, level ) __string( path, path ) ), TP_fast_assign( __entry->root = cgrp->root->hierarchy_id; __entry->id = cgroup_id(cgrp); __entry->level = cgrp->level; __assign_str(path, path); ), TP_printk("root=%d id=%d level=%d path=%s", __entry->root, __entry->id, __entry->level, __get_str(path)) ); DEFINE_EVENT(cgroup, cgroup_mkdir, TP_PROTO(struct cgroup *cgrp, const char *path), TP_ARGS(cgrp, path) ); DEFINE_EVENT(cgroup, cgroup_rmdir, TP_PROTO(struct cgroup *cgrp, const char *path), TP_ARGS(cgrp, path) ); DEFINE_EVENT(cgroup, cgroup_release, TP_PROTO(struct cgroup *cgrp, const char *path), TP_ARGS(cgrp, path) ); DEFINE_EVENT(cgroup, cgroup_rename, TP_PROTO(struct cgroup *cgrp, const char *path), TP_ARGS(cgrp, path) ); DEFINE_EVENT(cgroup, cgroup_freeze, TP_PROTO(struct cgroup *cgrp, const char *path), TP_ARGS(cgrp, path) ); DEFINE_EVENT(cgroup, cgroup_unfreeze, TP_PROTO(struct cgroup *cgrp, const char *path), TP_ARGS(cgrp, path) ); DECLARE_EVENT_CLASS(cgroup_migrate, TP_PROTO(struct cgroup *dst_cgrp, const char *path, struct task_struct *task, bool threadgroup), TP_ARGS(dst_cgrp, path, task, threadgroup), TP_STRUCT__entry( __field( int, dst_root ) __field( int, dst_id ) __field( int, dst_level ) __field( int, pid ) __string( dst_path, path ) __string( comm, task->comm ) ), TP_fast_assign( __entry->dst_root = dst_cgrp->root->hierarchy_id; __entry->dst_id = cgroup_id(dst_cgrp); __entry->dst_level = dst_cgrp->level; __assign_str(dst_path, path); __entry->pid = task->pid; __assign_str(comm, task->comm); ), TP_printk("dst_root=%d dst_id=%d dst_level=%d dst_path=%s pid=%d comm=%s", __entry->dst_root, __entry->dst_id, __entry->dst_level, __get_str(dst_path), __entry->pid, __get_str(comm)) ); DEFINE_EVENT(cgroup_migrate, cgroup_attach_task, TP_PROTO(struct cgroup *dst_cgrp, const char *path, struct task_struct *task, bool threadgroup), TP_ARGS(dst_cgrp, path, task, threadgroup) ); DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks, TP_PROTO(struct cgroup *dst_cgrp, const char *path, struct task_struct *task, bool threadgroup), TP_ARGS(dst_cgrp, path, task, threadgroup) ); DECLARE_EVENT_CLASS(cgroup_event, TP_PROTO(struct cgroup *cgrp, const char *path, int val), TP_ARGS(cgrp, path, val), TP_STRUCT__entry( __field( int, root ) __field( int, id ) __field( int, level ) __string( path, path ) __field( int, val ) ), TP_fast_assign( __entry->root = cgrp->root->hierarchy_id; __entry->id = cgroup_id(cgrp); __entry->level = cgrp->level; __assign_str(path, path); __entry->val = val; ), TP_printk("root=%d id=%d level=%d path=%s val=%d", __entry->root, __entry->id, __entry->level, __get_str(path), __entry->val) ); DEFINE_EVENT(cgroup_event, cgroup_notify_populated, TP_PROTO(struct cgroup *cgrp, const char *path, int val), TP_ARGS(cgrp, path, val) ); DEFINE_EVENT(cgroup_event, cgroup_notify_frozen, TP_PROTO(struct cgroup *cgrp, const char *path, int val), TP_ARGS(cgrp, path, val) ); #endif /* _TRACE_CGROUP_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * NetLabel System * * The NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * * Author: Paul Moore <paul@paul-moore.com> */ /* * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 */ #ifndef _NETLABEL_H #define _NETLABEL_H #include <linux/types.h> #include <linux/slab.h> #include <linux/net.h> #include <linux/skbuff.h> #include <linux/in.h> #include <linux/in6.h> #include <net/netlink.h> #include <net/request_sock.h> #include <linux/refcount.h> struct cipso_v4_doi; struct calipso_doi; /* * NetLabel - A management interface for maintaining network packet label * mapping tables for explicit packet labling protocols. * * Network protocols such as CIPSO and RIPSO require a label translation layer * to convert the label on the packet into something meaningful on the host * machine. In the current Linux implementation these mapping tables live * inside the kernel; NetLabel provides a mechanism for user space applications * to manage these mapping tables. * * NetLabel makes use of the Generic NETLINK mechanism as a transport layer to * send messages between kernel and user space. The general format of a * NetLabel message is shown below: * * +-----------------+-------------------+--------- --- -- - * | struct nlmsghdr | struct genlmsghdr | payload * +-----------------+-------------------+--------- --- -- - * * The 'nlmsghdr' and 'genlmsghdr' structs should be dealt with like normal. * The payload is dependent on the subsystem specified in the * 'nlmsghdr->nlmsg_type' and should be defined below, supporting functions * should be defined in the corresponding net/netlabel/netlabel_<subsys>.h|c * file. All of the fields in the NetLabel payload are NETLINK attributes, see * the include/net/netlink.h file for more information on NETLINK attributes. * */ /* * NetLabel NETLINK protocol */ /* NetLabel NETLINK protocol version * 1: initial version * 2: added static labels for unlabeled connections * 3: network selectors added to the NetLabel/LSM domain mapping and the * CIPSO_V4_MAP_LOCAL CIPSO mapping was added */ #define NETLBL_PROTO_VERSION 3 /* NetLabel NETLINK types/families */ #define NETLBL_NLTYPE_NONE 0 #define NETLBL_NLTYPE_MGMT 1 #define NETLBL_NLTYPE_MGMT_NAME "NLBL_MGMT" #define NETLBL_NLTYPE_RIPSO 2 #define NETLBL_NLTYPE_RIPSO_NAME "NLBL_RIPSO" #define NETLBL_NLTYPE_CIPSOV4 3 #define NETLBL_NLTYPE_CIPSOV4_NAME "NLBL_CIPSOv4" #define NETLBL_NLTYPE_CIPSOV6 4 #define NETLBL_NLTYPE_CIPSOV6_NAME "NLBL_CIPSOv6" #define NETLBL_NLTYPE_UNLABELED 5 #define NETLBL_NLTYPE_UNLABELED_NAME "NLBL_UNLBL" #define NETLBL_NLTYPE_ADDRSELECT 6 #define NETLBL_NLTYPE_ADDRSELECT_NAME "NLBL_ADRSEL" #define NETLBL_NLTYPE_CALIPSO 7 #define NETLBL_NLTYPE_CALIPSO_NAME "NLBL_CALIPSO" /* * NetLabel - Kernel API for accessing the network packet label mappings. * * The following functions are provided for use by other kernel modules, * specifically kernel LSM modules, to provide a consistent, transparent API * for dealing with explicit packet labeling protocols such as CIPSO and * RIPSO. The functions defined here are implemented in the * net/netlabel/netlabel_kapi.c file. * */ /* NetLabel audit information */ struct netlbl_audit { u32 secid; kuid_t loginuid; unsigned int sessionid; }; /* * LSM security attributes */ /** * struct netlbl_lsm_cache - NetLabel LSM security attribute cache * @refcount: atomic reference counter * @free: LSM supplied function to free the cache data * @data: LSM supplied cache data * * Description: * This structure is provided for LSMs which wish to make use of the NetLabel * caching mechanism to store LSM specific data/attributes in the NetLabel * cache. If the LSM has to perform a lot of translation from the NetLabel * security attributes into it's own internal representation then the cache * mechanism can provide a way to eliminate some or all of that translation * overhead on a cache hit. * */ struct netlbl_lsm_cache { refcount_t refcount; void (*free) (const void *data); void *data; }; /** * struct netlbl_lsm_catmap - NetLabel LSM secattr category bitmap * @startbit: the value of the lowest order bit in the bitmap * @bitmap: the category bitmap * @next: pointer to the next bitmap "node" or NULL * * Description: * This structure is used to represent category bitmaps. Due to the large * number of categories supported by most labeling protocols it is not * practical to transfer a full bitmap internally so NetLabel adopts a sparse * bitmap structure modeled after SELinux's ebitmap structure. * The catmap bitmap field MUST be a power of two in length and large * enough to hold at least 240 bits. Special care (i.e. check the code!) * should be used when changing these values as the LSM implementation * probably has functions which rely on the sizes of these types to speed * processing. * */ #define NETLBL_CATMAP_MAPTYPE u64 #define NETLBL_CATMAP_MAPCNT 4 #define NETLBL_CATMAP_MAPSIZE (sizeof(NETLBL_CATMAP_MAPTYPE) * 8) #define NETLBL_CATMAP_SIZE (NETLBL_CATMAP_MAPSIZE * \ NETLBL_CATMAP_MAPCNT) #define NETLBL_CATMAP_BIT (NETLBL_CATMAP_MAPTYPE)0x01 struct netlbl_lsm_catmap { u32 startbit; NETLBL_CATMAP_MAPTYPE bitmap[NETLBL_CATMAP_MAPCNT]; struct netlbl_lsm_catmap *next; }; /** * struct netlbl_lsm_secattr - NetLabel LSM security attributes * @flags: indicate structure attributes, see NETLBL_SECATTR_* * @type: indicate the NLTYPE of the attributes * @domain: the NetLabel LSM domain * @cache: NetLabel LSM specific cache * @attr.mls: MLS sensitivity label * @attr.mls.cat: MLS category bitmap * @attr.mls.lvl: MLS sensitivity level * @attr.secid: LSM specific secid token * * Description: * This structure is used to pass security attributes between NetLabel and the * LSM modules. The flags field is used to specify which fields within the * struct are valid and valid values can be created by bitwise OR'ing the * NETLBL_SECATTR_* defines. The domain field is typically set by the LSM to * specify domain specific configuration settings and is not usually used by * NetLabel itself when returning security attributes to the LSM. * */ struct netlbl_lsm_secattr { u32 flags; /* bitmap values for 'flags' */ #define NETLBL_SECATTR_NONE 0x00000000 #define NETLBL_SECATTR_DOMAIN 0x00000001 #define NETLBL_SECATTR_DOMAIN_CPY (NETLBL_SECATTR_DOMAIN | \ NETLBL_SECATTR_FREE_DOMAIN) #define NETLBL_SECATTR_CACHE 0x00000002 #define NETLBL_SECATTR_MLS_LVL 0x00000004 #define NETLBL_SECATTR_MLS_CAT 0x00000008 #define NETLBL_SECATTR_SECID 0x00000010 /* bitmap meta-values for 'flags' */ #define NETLBL_SECATTR_FREE_DOMAIN 0x01000000 #define NETLBL_SECATTR_CACHEABLE (NETLBL_SECATTR_MLS_LVL | \ NETLBL_SECATTR_MLS_CAT | \ NETLBL_SECATTR_SECID) u32 type; char *domain; struct netlbl_lsm_cache *cache; struct { struct { struct netlbl_lsm_catmap *cat; u32 lvl; } mls; u32 secid; } attr; }; /** * struct netlbl_calipso_ops - NetLabel CALIPSO operations * @doi_add: add a CALIPSO DOI * @doi_free: free a CALIPSO DOI * @doi_getdef: returns a reference to a DOI * @doi_putdef: releases a reference of a DOI * @doi_walk: enumerate the DOI list * @sock_getattr: retrieve the socket's attr * @sock_setattr: set the socket's attr * @sock_delattr: remove the socket's attr * @req_setattr: set the req socket's attr * @req_delattr: remove the req socket's attr * @opt_getattr: retrieve attr from memory block * @skbuff_optptr: find option in packet * @skbuff_setattr: set the skbuff's attr * @skbuff_delattr: remove the skbuff's attr * @cache_invalidate: invalidate cache * @cache_add: add cache entry * * Description: * This structure is filled out by the CALIPSO engine and passed * to the NetLabel core via a call to netlbl_calipso_ops_register(). * It enables the CALIPSO engine (and hence IPv6) to be compiled * as a module. */ struct netlbl_calipso_ops { int (*doi_add)(struct calipso_doi *doi_def, struct netlbl_audit *audit_info); void (*doi_free)(struct calipso_doi *doi_def); int (*doi_remove)(u32 doi, struct netlbl_audit *audit_info); struct calipso_doi *(*doi_getdef)(u32 doi); void (*doi_putdef)(struct calipso_doi *doi_def); int (*doi_walk)(u32 *skip_cnt, int (*callback)(struct calipso_doi *doi_def, void *arg), void *cb_arg); int (*sock_getattr)(struct sock *sk, struct netlbl_lsm_secattr *secattr); int (*sock_setattr)(struct sock *sk, const struct calipso_doi *doi_def, const struct netlbl_lsm_secattr *secattr); void (*sock_delattr)(struct sock *sk); int (*req_setattr)(struct request_sock *req, const struct calipso_doi *doi_def, const struct netlbl_lsm_secattr *secattr); void (*req_delattr)(struct request_sock *req); int (*opt_getattr)(const unsigned char *calipso, struct netlbl_lsm_secattr *secattr); unsigned char *(*skbuff_optptr)(const struct sk_buff *skb); int (*skbuff_setattr)(struct sk_buff *skb, const struct calipso_doi *doi_def, const struct netlbl_lsm_secattr *secattr); int (*skbuff_delattr)(struct sk_buff *skb); void (*cache_invalidate)(void); int (*cache_add)(const unsigned char *calipso_ptr, const struct netlbl_lsm_secattr *secattr); }; /* * LSM security attribute operations (inline) */ /** * netlbl_secattr_cache_alloc - Allocate and initialize a secattr cache * @flags: the memory allocation flags * * Description: * Allocate and initialize a netlbl_lsm_cache structure. Returns a pointer * on success, NULL on failure. * */ static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags) { struct netlbl_lsm_cache *cache; cache = kzalloc(sizeof(*cache), flags); if (cache) refcount_set(&cache->refcount, 1); return cache; } /** * netlbl_secattr_cache_free - Frees a netlbl_lsm_cache struct * @cache: the struct to free * * Description: * Frees @secattr including all of the internal buffers. * */ static inline void netlbl_secattr_cache_free(struct netlbl_lsm_cache *cache) { if (!refcount_dec_and_test(&cache->refcount)) return; if (cache->free) cache->free(cache->data); kfree(cache); } /** * netlbl_catmap_alloc - Allocate a LSM secattr catmap * @flags: memory allocation flags * * Description: * Allocate memory for a LSM secattr catmap, returns a pointer on success, NULL * on failure. * */ static inline struct netlbl_lsm_catmap *netlbl_catmap_alloc(gfp_t flags) { return kzalloc(sizeof(struct netlbl_lsm_catmap), flags); } /** * netlbl_catmap_free - Free a LSM secattr catmap * @catmap: the category bitmap * * Description: * Free a LSM secattr catmap. * */ static inline void netlbl_catmap_free(struct netlbl_lsm_catmap *catmap) { struct netlbl_lsm_catmap *iter; while (catmap) { iter = catmap; catmap = catmap->next; kfree(iter); } } /** * netlbl_secattr_init - Initialize a netlbl_lsm_secattr struct * @secattr: the struct to initialize * * Description: * Initialize an already allocated netlbl_lsm_secattr struct. * */ static inline void netlbl_secattr_init(struct netlbl_lsm_secattr *secattr) { memset(secattr, 0, sizeof(*secattr)); } /** * netlbl_secattr_destroy - Clears a netlbl_lsm_secattr struct * @secattr: the struct to clear * * Description: * Destroys the @secattr struct, including freeing all of the internal buffers. * The struct must be reset with a call to netlbl_secattr_init() before reuse. * */ static inline void netlbl_secattr_destroy(struct netlbl_lsm_secattr *secattr) { if (secattr->flags & NETLBL_SECATTR_FREE_DOMAIN) kfree(secattr->domain); if (secattr->flags & NETLBL_SECATTR_CACHE) netlbl_secattr_cache_free(secattr->cache); if (secattr->flags & NETLBL_SECATTR_MLS_CAT) netlbl_catmap_free(secattr->attr.mls.cat); } /** * netlbl_secattr_alloc - Allocate and initialize a netlbl_lsm_secattr struct * @flags: the memory allocation flags * * Description: * Allocate and initialize a netlbl_lsm_secattr struct. Returns a valid * pointer on success, or NULL on failure. * */ static inline struct netlbl_lsm_secattr *netlbl_secattr_alloc(gfp_t flags) { return kzalloc(sizeof(struct netlbl_lsm_secattr), flags); } /** * netlbl_secattr_free - Frees a netlbl_lsm_secattr struct * @secattr: the struct to free * * Description: * Frees @secattr including all of the internal buffers. * */ static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr) { netlbl_secattr_destroy(secattr); kfree(secattr); } #ifdef CONFIG_NETLABEL /* * LSM configuration operations */ int netlbl_cfg_map_del(const char *domain, u16 family, const void *addr, const void *mask, struct netlbl_audit *audit_info); int netlbl_cfg_unlbl_map_add(const char *domain, u16 family, const void *addr, const void *mask, struct netlbl_audit *audit_info); int netlbl_cfg_unlbl_static_add(struct net *net, const char *dev_name, const void *addr, const void *mask, u16 family, u32 secid, struct netlbl_audit *audit_info); int netlbl_cfg_unlbl_static_del(struct net *net, const char *dev_name, const void *addr, const void *mask, u16 family, struct netlbl_audit *audit_info); int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def, struct netlbl_audit *audit_info); void netlbl_cfg_cipsov4_del(u32 doi, struct netlbl_audit *audit_info); int netlbl_cfg_cipsov4_map_add(u32 doi, const char *domain, const struct in_addr *addr, const struct in_addr *mask, struct netlbl_audit *audit_info); int netlbl_cfg_calipso_add(struct calipso_doi *doi_def, struct netlbl_audit *audit_info); void netlbl_cfg_calipso_del(u32 doi, struct netlbl_audit *audit_info); int netlbl_cfg_calipso_map_add(u32 doi, const char *domain, const struct in6_addr *addr, const struct in6_addr *mask, struct netlbl_audit *audit_info); /* * LSM security attribute operations */ int netlbl_catmap_walk(struct netlbl_lsm_catmap *catmap, u32 offset); int netlbl_catmap_walkrng(struct netlbl_lsm_catmap *catmap, u32 offset); int netlbl_catmap_getlong(struct netlbl_lsm_catmap *catmap, u32 *offset, unsigned long *bitmap); int netlbl_catmap_setbit(struct netlbl_lsm_catmap **catmap, u32 bit, gfp_t flags); int netlbl_catmap_setrng(struct netlbl_lsm_catmap **catmap, u32 start, u32 end, gfp_t flags); int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap, u32 offset, unsigned long bitmap, gfp_t flags); /* Bitmap functions */ int netlbl_bitmap_walk(const unsigned char *bitmap, u32 bitmap_len, u32 offset, u8 state); void netlbl_bitmap_setbit(unsigned char *bitmap, u32 bit, u8 state); /* * LSM protocol operations (NetLabel LSM/kernel API) */ int netlbl_enabled(void); int netlbl_sock_setattr(struct sock *sk, u16 family, const struct netlbl_lsm_secattr *secattr); void netlbl_sock_delattr(struct sock *sk); int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr); int netlbl_conn_setattr(struct sock *sk, struct sockaddr *addr, const struct netlbl_lsm_secattr *secattr); int netlbl_req_setattr(struct request_sock *req, const struct netlbl_lsm_secattr *secattr); void netlbl_req_delattr(struct request_sock *req); int netlbl_skbuff_setattr(struct sk_buff *skb, u16 family, const struct netlbl_lsm_secattr *secattr); int netlbl_skbuff_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr); void netlbl_skbuff_err(struct sk_buff *skb, u16 family, int error, int gateway); /* * LSM label mapping cache operations */ void netlbl_cache_invalidate(void); int netlbl_cache_add(const struct sk_buff *skb, u16 family, const struct netlbl_lsm_secattr *secattr); /* * Protocol engine operations */ struct audit_buffer *netlbl_audit_start(int type, struct netlbl_audit *audit_info); #else static inline int netlbl_cfg_map_del(const char *domain, u16 family, const void *addr, const void *mask, struct netlbl_audit *audit_info) { return -ENOSYS; } static inline int netlbl_cfg_unlbl_map_add(const char *domain, u16 family, void *addr, void *mask, struct netlbl_audit *audit_info) { return -ENOSYS; } static inline int netlbl_cfg_unlbl_static_add(struct net *net, const char *dev_name, const void *addr, const void *mask, u16 family, u32 secid, struct netlbl_audit *audit_info) { return -ENOSYS; } static inline int netlbl_cfg_unlbl_static_del(struct net *net, const char *dev_name, const void *addr, const void *mask, u16 family, struct netlbl_audit *audit_info) { return -ENOSYS; } static inline int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def, struct netlbl_audit *audit_info) { return -ENOSYS; } static inline void netlbl_cfg_cipsov4_del(u32 doi, struct netlbl_audit *audit_info) { return; } static inline int netlbl_cfg_cipsov4_map_add(u32 doi, const char *domain, const struct in_addr *addr, const struct in_addr *mask, struct netlbl_audit *audit_info) { return -ENOSYS; } static inline int netlbl_cfg_calipso_add(struct calipso_doi *doi_def, struct netlbl_audit *audit_info) { return -ENOSYS; } static inline void netlbl_cfg_calipso_del(u32 doi, struct netlbl_audit *audit_info) { return; } static inline int netlbl_cfg_calipso_map_add(u32 doi, const char *domain, const struct in6_addr *addr, const struct in6_addr *mask, struct netlbl_audit *audit_info) { return -ENOSYS; } static inline int netlbl_catmap_walk(struct netlbl_lsm_catmap *catmap, u32 offset) { return -ENOENT; } static inline int netlbl_catmap_walkrng(struct netlbl_lsm_catmap *catmap, u32 offset) { return -ENOENT; } static inline int netlbl_catmap_getlong(struct netlbl_lsm_catmap *catmap, u32 *offset, unsigned long *bitmap) { return 0; } static inline int netlbl_catmap_setbit(struct netlbl_lsm_catmap **catmap, u32 bit, gfp_t flags) { return 0; } static inline int netlbl_catmap_setrng(struct netlbl_lsm_catmap **catmap, u32 start, u32 end, gfp_t flags) { return 0; } static inline int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap, u32 offset, unsigned long bitmap, gfp_t flags) { return 0; } static inline int netlbl_enabled(void) { return 0; } static inline int netlbl_sock_setattr(struct sock *sk, u16 family, const struct netlbl_lsm_secattr *secattr) { return -ENOSYS; } static inline void netlbl_sock_delattr(struct sock *sk) { } static inline int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) { return -ENOSYS; } static inline int netlbl_conn_setattr(struct sock *sk, struct sockaddr *addr, const struct netlbl_lsm_secattr *secattr) { return -ENOSYS; } static inline int netlbl_req_setattr(struct request_sock *req, const struct netlbl_lsm_secattr *secattr) { return -ENOSYS; } static inline void netlbl_req_delattr(struct request_sock *req) { return; } static inline int netlbl_skbuff_setattr(struct sk_buff *skb, u16 family, const struct netlbl_lsm_secattr *secattr) { return -ENOSYS; } static inline int netlbl_skbuff_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr) { return -ENOSYS; } static inline void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway) { return; } static inline void netlbl_cache_invalidate(void) { return; } static inline int netlbl_cache_add(const struct sk_buff *skb, u16 family, const struct netlbl_lsm_secattr *secattr) { return 0; } static inline struct audit_buffer *netlbl_audit_start(int type, struct netlbl_audit *audit_info) { return NULL; } #endif /* CONFIG_NETLABEL */ const struct netlbl_calipso_ops * netlbl_calipso_ops_register(const struct netlbl_calipso_ops *ops); #endif /* _NETLABEL_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PART_STAT_H #define _LINUX_PART_STAT_H #include <linux/genhd.h> struct disk_stats { u64 nsecs[NR_STAT_GROUPS]; unsigned long sectors[NR_STAT_GROUPS]; unsigned long ios[NR_STAT_GROUPS]; unsigned long merges[NR_STAT_GROUPS]; unsigned long io_ticks; local_t in_flight[2]; }; /* * Macros to operate on percpu disk statistics: * * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters and should * be called between disk_stat_lock() and disk_stat_unlock(). * * part_stat_read() can be called at any time. */ #define part_stat_lock() preempt_disable() #define part_stat_unlock() preempt_enable() #define part_stat_get_cpu(part, field, cpu) \ (per_cpu_ptr((part)->dkstats, (cpu))->field) #define part_stat_get(part, field) \ part_stat_get_cpu(part, field, smp_processor_id()) #define part_stat_read(part, field) \ ({ \ typeof((part)->dkstats->field) res = 0; \ unsigned int _cpu; \ for_each_possible_cpu(_cpu) \ res += per_cpu_ptr((part)->dkstats, _cpu)->field; \ res; \ }) static inline void part_stat_set_all(struct hd_struct *part, int value) { int i; for_each_possible_cpu(i) memset(per_cpu_ptr(part->dkstats, i), value, sizeof(struct disk_stats)); } #define part_stat_read_accum(part, field) \ (part_stat_read(part, field[STAT_READ]) + \ part_stat_read(part, field[STAT_WRITE]) + \ part_stat_read(part, field[STAT_DISCARD])) #define __part_stat_add(part, field, addnd) \ __this_cpu_add((part)->dkstats->field, addnd) #define part_stat_add(part, field, addnd) do { \ __part_stat_add((part), field, addnd); \ if ((part)->partno) \ __part_stat_add(&part_to_disk((part))->part0, \ field, addnd); \ } while (0) #define part_stat_dec(gendiskp, field) \ part_stat_add(gendiskp, field, -1) #define part_stat_inc(gendiskp, field) \ part_stat_add(gendiskp, field, 1) #define part_stat_sub(gendiskp, field, subnd) \ part_stat_add(gendiskp, field, -subnd) #define part_stat_local_dec(gendiskp, field) \ local_dec(&(part_stat_get(gendiskp, field))) #define part_stat_local_inc(gendiskp, field) \ local_inc(&(part_stat_get(gendiskp, field))) #define part_stat_local_read(gendiskp, field) \ local_read(&(part_stat_get(gendiskp, field))) #define part_stat_local_read_cpu(gendiskp, field, cpu) \ local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) #endif /* _LINUX_PART_STAT_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 /* * include/linux/ktime.h * * ktime_t - nanosecond-resolution time format. * * Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de> * Copyright(C) 2005, Red Hat, Inc., Ingo Molnar * * data type definitions, declarations, prototypes and macros. * * Started by: Thomas Gleixner and Ingo Molnar * * Credits: * * Roman Zippel provided the ideas and primary code snippets of * the ktime_t union and further simplifications of the original * code. * * For licencing details see kernel-base/COPYING */ #ifndef _LINUX_KTIME_H #define _LINUX_KTIME_H #include <linux/time.h> #include <linux/jiffies.h> #include <asm/bug.h> /* Nanosecond scalar representation for kernel time values */ typedef s64 ktime_t; /** * ktime_set - Set a ktime_t variable from a seconds/nanoseconds value * @secs: seconds to set * @nsecs: nanoseconds to set * * Return: The ktime_t representation of the value. */ static inline ktime_t ktime_set(const s64 secs, const unsigned long nsecs) { if (unlikely(secs >= KTIME_SEC_MAX)) return KTIME_MAX; return secs * NSEC_PER_SEC + (s64)nsecs; } /* Subtract two ktime_t variables. rem = lhs -rhs: */ #define ktime_sub(lhs, rhs) ((lhs) - (rhs)) /* Add two ktime_t variables. res = lhs + rhs: */ #define ktime_add(lhs, rhs) ((lhs) + (rhs)) /* * Same as ktime_add(), but avoids undefined behaviour on overflow; however, * this means that you must check the result for overflow yourself. */ #define ktime_add_unsafe(lhs, rhs) ((u64) (lhs) + (rhs)) /* * Add a ktime_t variable and a scalar nanosecond value. * res = kt + nsval: */ #define ktime_add_ns(kt, nsval) ((kt) + (nsval)) /* * Subtract a scalar nanosecod from a ktime_t variable * res = kt - nsval: */ #define ktime_sub_ns(kt, nsval) ((kt) - (nsval)) /* convert a timespec64 to ktime_t format: */ static inline ktime_t timespec64_to_ktime(struct timespec64 ts) { return ktime_set(ts.tv_sec, ts.tv_nsec); } /* Map the ktime_t to timespec conversion to ns_to_timespec function */ #define ktime_to_timespec64(kt) ns_to_timespec64((kt)) /* Convert ktime_t to nanoseconds */ static inline s64 ktime_to_ns(const ktime_t kt) { return kt; } /** * ktime_compare - Compares two ktime_t variables for less, greater or equal * @cmp1: comparable1 * @cmp2: comparable2 * * Return: ... * cmp1 < cmp2: return <0 * cmp1 == cmp2: return 0 * cmp1 > cmp2: return >0 */ static inline int ktime_compare(const ktime_t cmp1, const ktime_t cmp2) { if (cmp1 < cmp2) return -1; if (cmp1 > cmp2) return 1; return 0; } /** * ktime_after - Compare if a ktime_t value is bigger than another one. * @cmp1: comparable1 * @cmp2: comparable2 * * Return: true if cmp1 happened after cmp2. */ static inline bool ktime_after(const ktime_t cmp1, const ktime_t cmp2) { return ktime_compare(cmp1, cmp2) > 0; } /** * ktime_before - Compare if a ktime_t value is smaller than another one. * @cmp1: comparable1 * @cmp2: comparable2 * * Return: true if cmp1 happened before cmp2. */ static inline bool ktime_before(const ktime_t cmp1, const ktime_t cmp2) { return ktime_compare(cmp1, cmp2) < 0; } #if BITS_PER_LONG < 64 extern s64 __ktime_divns(const ktime_t kt, s64 div); static inline s64 ktime_divns(const ktime_t kt, s64 div) { /* * Negative divisors could cause an inf loop, * so bug out here. */ BUG_ON(div < 0); if (__builtin_constant_p(div) && !(div >> 32)) { s64 ns = kt; u64 tmp = ns < 0 ? -ns : ns; do_div(tmp, div); return ns < 0 ? -tmp : tmp; } else { return __ktime_divns(kt, div); } } #else /* BITS_PER_LONG < 64 */ static inline s64 ktime_divns(const ktime_t kt, s64 div) { /* * 32-bit implementation cannot handle negative divisors, * so catch them on 64bit as well. */ WARN_ON(div < 0); return kt / div; } #endif static inline s64 ktime_to_us(const ktime_t kt) { return ktime_divns(kt, NSEC_PER_USEC); } static inline s64 ktime_to_ms(const ktime_t kt) { return ktime_divns(kt, NSEC_PER_MSEC); } static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier) { return ktime_to_us(ktime_sub(later, earlier)); } static inline s64 ktime_ms_delta(const ktime_t later, const ktime_t earlier) { return ktime_to_ms(ktime_sub(later, earlier)); } static inline ktime_t ktime_add_us(const ktime_t kt, const u64 usec) { return ktime_add_ns(kt, usec * NSEC_PER_USEC); } static inline ktime_t ktime_add_ms(const ktime_t kt, const u64 msec) { return ktime_add_ns(kt, msec * NSEC_PER_MSEC); } static inline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec) { return ktime_sub_ns(kt, usec * NSEC_PER_USEC); } static inline ktime_t ktime_sub_ms(const ktime_t kt, const u64 msec) { return ktime_sub_ns(kt, msec * NSEC_PER_MSEC); } extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs); /** * ktime_to_timespec64_cond - convert a ktime_t variable to timespec64 * format only if the variable contains data * @kt: the ktime_t variable to convert * @ts: the timespec variable to store the result in * * Return: %true if there was a successful conversion, %false if kt was 0. */ static inline __must_check bool ktime_to_timespec64_cond(const ktime_t kt, struct timespec64 *ts) { if (kt) { *ts = ktime_to_timespec64(kt); return true; } else { return false; } } #include <vdso/ktime.h> static inline ktime_t ns_to_ktime(u64 ns) { return ns; } static inline ktime_t ms_to_ktime(u64 ms) { return ms * NSEC_PER_MSEC; } # include <linux/timekeeping.h> # include <linux/timekeeping32.h> #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 1994 Linus Torvalds * * Pentium III FXSR, SSE support * General FPU state handling cleanups * Gareth Hughes <gareth@valinux.com>, May 2000 */ #include <asm/fpu/internal.h> #include <asm/fpu/regset.h> #include <asm/fpu/signal.h> #include <asm/fpu/types.h> #include <asm/traps.h> #include <asm/irq_regs.h> #include <linux/hardirq.h> #include <linux/pkeys.h> #define CREATE_TRACE_POINTS #include <asm/trace/fpu.h> /* * Represents the initial FPU state. It's mostly (but not completely) zeroes, * depending on the FPU hardware format: */ union fpregs_state init_fpstate __read_mostly; /* * Track whether the kernel is using the FPU state * currently. * * This flag is used: * * - by IRQ context code to potentially use the FPU * if it's unused. * * - to debug kernel_fpu_begin()/end() correctness */ static DEFINE_PER_CPU(bool, in_kernel_fpu); /* * Track which context is using the FPU on the CPU: */ DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); static bool kernel_fpu_disabled(void) { return this_cpu_read(in_kernel_fpu); } static bool interrupted_kernel_fpu_idle(void) { return !kernel_fpu_disabled(); } /* * Were we in user mode (or vm86 mode) when we were * interrupted? * * Doing kernel_fpu_begin/end() is ok if we are running * in an interrupt context from user mode - we'll just * save the FPU state as required. */ static bool interrupted_user_mode(void) { struct pt_regs *regs = get_irq_regs(); return regs && user_mode(regs); } /* * Can we use the FPU in kernel mode with the * whole "kernel_fpu_begin/end()" sequence? * * It's always ok in process context (ie "not interrupt") * but it is sometimes ok even from an irq. */ bool irq_fpu_usable(void) { return !in_interrupt() || interrupted_user_mode() || interrupted_kernel_fpu_idle(); } EXPORT_SYMBOL(irq_fpu_usable); /* * These must be called with preempt disabled. Returns * 'true' if the FPU state is still intact and we can * keep registers active. * * The legacy FNSAVE instruction cleared all FPU state * unconditionally, so registers are essentially destroyed. * Modern FPU state can be kept in registers, if there are * no pending FP exceptions. */ int copy_fpregs_to_fpstate(struct fpu *fpu) { if (likely(use_xsave())) { copy_xregs_to_kernel(&fpu->state.xsave); /* * AVX512 state is tracked here because its use is * known to slow the max clock speed of the core. */ if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512) fpu->avx512_timestamp = jiffies; return 1; } if (likely(use_fxsr())) { copy_fxregs_to_kernel(fpu); return 1; } /* * Legacy FPU register saving, FNSAVE always clears FPU registers, * so we have to mark them inactive: */ asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave)); return 0; } EXPORT_SYMBOL(copy_fpregs_to_fpstate); void kernel_fpu_begin_mask(unsigned int kfpu_mask) { preempt_disable(); WARN_ON_FPU(!irq_fpu_usable()); WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); this_cpu_write(in_kernel_fpu, true); if (!(current->flags & PF_KTHREAD) && !test_thread_flag(TIF_NEED_FPU_LOAD)) { set_thread_flag(TIF_NEED_FPU_LOAD); /* * Ignore return value -- we don't care if reg state * is clobbered. */ copy_fpregs_to_fpstate(&current->thread.fpu); } __cpu_invalidate_fpregs_state(); /* Put sane initial values into the control registers. */ if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM)) ldmxcsr(MXCSR_DEFAULT); if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU)) asm volatile ("fninit"); } EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask); void kernel_fpu_end(void) { WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); this_cpu_write(in_kernel_fpu, false); preempt_enable(); } EXPORT_SYMBOL_GPL(kernel_fpu_end); /* * Save the FPU state (mark it for reload if necessary): * * This only ever gets called for the current task. */ void fpu__save(struct fpu *fpu) { WARN_ON_FPU(fpu != &current->thread.fpu); fpregs_lock(); trace_x86_fpu_before_save(fpu); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { if (!copy_fpregs_to_fpstate(fpu)) { copy_kernel_to_fpregs(&fpu->state); } } trace_x86_fpu_after_save(fpu); fpregs_unlock(); } /* * Legacy x87 fpstate state init: */ static inline void fpstate_init_fstate(struct fregs_state *fp) { fp->cwd = 0xffff037fu; fp->swd = 0xffff0000u; fp->twd = 0xffffffffu; fp->fos = 0xffff0000u; } void fpstate_init(union fpregs_state *state) { if (!static_cpu_has(X86_FEATURE_FPU)) { fpstate_init_soft(&state->soft); return; } memset(state, 0, fpu_kernel_xstate_size); if (static_cpu_has(X86_FEATURE_XSAVES)) fpstate_init_xstate(&state->xsave); if (static_cpu_has(X86_FEATURE_FXSR)) fpstate_init_fxstate(&state->fxsave); else fpstate_init_fstate(&state->fsave); } EXPORT_SYMBOL_GPL(fpstate_init); int fpu__copy(struct task_struct *dst, struct task_struct *src) { struct fpu *dst_fpu = &dst->thread.fpu; struct fpu *src_fpu = &src->thread.fpu; dst_fpu->last_cpu = -1; if (!static_cpu_has(X86_FEATURE_FPU)) return 0; WARN_ON_FPU(src_fpu != &current->thread.fpu); /* * Don't let 'init optimized' areas of the XSAVE area * leak into the child task: */ memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); /* * If the FPU registers are not current just memcpy() the state. * Otherwise save current FPU registers directly into the child's FPU * context, without any memory-to-memory copying. * * ( The function 'fails' in the FNSAVE case, which destroys * register contents so we have to load them back. ) */ fpregs_lock(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size); else if (!copy_fpregs_to_fpstate(dst_fpu)) copy_kernel_to_fpregs(&dst_fpu->state); fpregs_unlock(); set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD); trace_x86_fpu_copy_src(src_fpu); trace_x86_fpu_copy_dst(dst_fpu); return 0; } /* * Activate the current task's in-memory FPU context, * if it has not been used before: */ static void fpu__initialize(struct fpu *fpu) { WARN_ON_FPU(fpu != &current->thread.fpu); set_thread_flag(TIF_NEED_FPU_LOAD); fpstate_init(&fpu->state); trace_x86_fpu_init_state(fpu); } /* * This function must be called before we read a task's fpstate. * * There's two cases where this gets called: * * - for the current task (when coredumping), in which case we have * to save the latest FPU registers into the fpstate, * * - or it's called for stopped tasks (ptrace), in which case the * registers were already saved by the context-switch code when * the task scheduled out. * * If the task has used the FPU before then save it. */ void fpu__prepare_read(struct fpu *fpu) { if (fpu == &current->thread.fpu) fpu__save(fpu); } /* * This function must be called before we write a task's fpstate. * * Invalidate any cached FPU registers. * * After this function call, after registers in the fpstate are * modified and the child task has woken up, the child task will * restore the modified FPU state from the modified context. If we * didn't clear its cached status here then the cached in-registers * state pending on its former CPU could be restored, corrupting * the modifications. */ void fpu__prepare_write(struct fpu *fpu) { /* * Only stopped child tasks can be used to modify the FPU * state in the fpstate buffer: */ WARN_ON_FPU(fpu == &current->thread.fpu); /* Invalidate any cached state: */ __fpu_invalidate_fpregs_state(fpu); } /* * Drops current FPU state: deactivates the fpregs and * the fpstate. NOTE: it still leaves previous contents * in the fpregs in the eager-FPU case. * * This function can be used in cases where we know that * a state-restore is coming: either an explicit one, * or a reschedule. */ void fpu__drop(struct fpu *fpu) { preempt_disable(); if (fpu == &current->thread.fpu) { /* Ignore delayed exceptions from user space */ asm volatile("1: fwait\n" "2:\n" _ASM_EXTABLE(1b, 2b)); fpregs_deactivate(fpu); } trace_x86_fpu_dropped(fpu); preempt_enable(); } /* * Clear FPU registers by setting them up from the init fpstate. * Caller must do fpregs_[un]lock() around it. */ static inline void copy_init_fpstate_to_fpregs(u64 features_mask) { if (use_xsave()) copy_kernel_to_xregs(&init_fpstate.xsave, features_mask); else if (static_cpu_has(X86_FEATURE_FXSR)) copy_kernel_to_fxregs(&init_fpstate.fxsave); else copy_kernel_to_fregs(&init_fpstate.fsave); if (boot_cpu_has(X86_FEATURE_OSPKE)) copy_init_pkru_to_fpregs(); } /* * Clear the FPU state back to init state. * * Called by sys_execve(), by the signal handler code and by various * error paths. */ static void fpu__clear(struct fpu *fpu, bool user_only) { WARN_ON_FPU(fpu != &current->thread.fpu); if (!static_cpu_has(X86_FEATURE_FPU)) { fpu__drop(fpu); fpu__initialize(fpu); return; } fpregs_lock(); if (user_only) { if (!fpregs_state_valid(fpu, smp_processor_id()) && xfeatures_mask_supervisor()) copy_kernel_to_xregs(&fpu->state.xsave, xfeatures_mask_supervisor()); copy_init_fpstate_to_fpregs(xfeatures_mask_user()); } else { copy_init_fpstate_to_fpregs(xfeatures_mask_all); } fpregs_mark_activate(); fpregs_unlock(); } void fpu__clear_user_states(struct fpu *fpu) { fpu__clear(fpu, true); } void fpu__clear_all(struct fpu *fpu) { fpu__clear(fpu, false); } /* * Load FPU context before returning to userspace. */ void switch_fpu_return(void) { if (!static_cpu_has(X86_FEATURE_FPU)) return; __fpregs_load_activate(); } EXPORT_SYMBOL_GPL(switch_fpu_return); #ifdef CONFIG_X86_DEBUG_FPU /* * If current FPU state according to its tracking (loaded FPU context on this * CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is * loaded on return to userland. */ void fpregs_assert_state_consistent(void) { struct fpu *fpu = &current->thread.fpu; if (test_thread_flag(TIF_NEED_FPU_LOAD)) return; WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id())); } EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent); #endif void fpregs_mark_activate(void) { struct fpu *fpu = &current->thread.fpu; fpregs_activate(fpu); fpu->last_cpu = smp_processor_id(); clear_thread_flag(TIF_NEED_FPU_LOAD); } EXPORT_SYMBOL_GPL(fpregs_mark_activate); /* * x87 math exception handling: */ int fpu__exception_code(struct fpu *fpu, int trap_nr) { int err; if (trap_nr == X86_TRAP_MF) { unsigned short cwd, swd; /* * (~cwd & swd) will mask out exceptions that are not set to unmasked * status. 0x3f is the exception bits in these regs, 0x200 is the * C1 reg you need in case of a stack fault, 0x040 is the stack * fault bit. We should only be taking one exception at a time, * so if this combination doesn't produce any single exception, * then we have a bad program that isn't synchronizing its FPU usage * and it will suffer the consequences since we won't be able to * fully reproduce the context of the exception. */ if (boot_cpu_has(X86_FEATURE_FXSR)) { cwd = fpu->state.fxsave.cwd; swd = fpu->state.fxsave.swd; } else { cwd = (unsigned short)fpu->state.fsave.cwd; swd = (unsigned short)fpu->state.fsave.swd; } err = swd & ~cwd; } else { /* * The SIMD FPU exceptions are handled a little differently, as there * is only a single status/control register. Thus, to determine which * unmasked exception was caught we must mask the exception mask bits * at 0x1f80, and then use these to mask the exception bits at 0x3f. */ unsigned short mxcsr = MXCSR_DEFAULT; if (boot_cpu_has(X86_FEATURE_XMM)) mxcsr = fpu->state.fxsave.mxcsr; err = ~(mxcsr >> 7) & mxcsr; } if (err & 0x001) { /* Invalid op */ /* * swd & 0x240 == 0x040: Stack Underflow * swd & 0x240 == 0x240: Stack Overflow * User must clear the SF bit (0x40) if set */ return FPE_FLTINV; } else if (err & 0x004) { /* Divide by Zero */ return FPE_FLTDIV; } else if (err & 0x008) { /* Overflow */ return FPE_FLTOVF; } else if (err & 0x012) { /* Denormal, Underflow */ return FPE_FLTUND; } else if (err & 0x020) { /* Precision */ return FPE_FLTRES; } /* * If we're using IRQ 13, or supposedly even some trap * X86_TRAP_MF implementations, it's possible * we get a spurious trap, which is not an error. */ return 0; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 #ifndef _LINUX_JHASH_H #define _LINUX_JHASH_H /* jhash.h: Jenkins hash support. * * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net) * * https://burtleburtle.net/bob/hash/ * * These are the credits from Bob's sources: * * lookup3.c, by Bob Jenkins, May 2006, Public Domain. * * These are functions for producing 32-bit hashes for hash table lookup. * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() * are externally useful functions. Routines to test the hash are included * if SELF_TEST is defined. You can use this free for any purpose. It's in * the public domain. It has no warranty. * * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@netfilter.org) * * I've modified Bob's hash to be useful in the Linux kernel, and * any bugs present are my fault. * Jozsef */ #include <linux/bitops.h> #include <linux/unaligned/packed_struct.h> /* Best hash sizes are of power of two */ #define jhash_size(n) ((u32)1<<(n)) /* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */ #define jhash_mask(n) (jhash_size(n)-1) /* __jhash_mix -- mix 3 32-bit values reversibly. */ #define __jhash_mix(a, b, c) \ { \ a -= c; a ^= rol32(c, 4); c += b; \ b -= a; b ^= rol32(a, 6); a += c; \ c -= b; c ^= rol32(b, 8); b += a; \ a -= c; a ^= rol32(c, 16); c += b; \ b -= a; b ^= rol32(a, 19); a += c; \ c -= b; c ^= rol32(b, 4); b += a; \ } /* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */ #define __jhash_final(a, b, c) \ { \ c ^= b; c -= rol32(b, 14); \ a ^= c; a -= rol32(c, 11); \ b ^= a; b -= rol32(a, 25); \ c ^= b; c -= rol32(b, 16); \ a ^= c; a -= rol32(c, 4); \ b ^= a; b -= rol32(a, 14); \ c ^= b; c -= rol32(b, 24); \ } /* An arbitrary initial parameter */ #define JHASH_INITVAL 0xdeadbeef /* jhash - hash an arbitrary key * @k: sequence of bytes as key * @length: the length of the key * @initval: the previous hash, or an arbitray value * * The generic version, hashes an arbitrary sequence of bytes. * No alignment or length assumptions are made about the input key. * * Returns the hash value of the key. The result depends on endianness. */ static inline u32 jhash(const void *key, u32 length, u32 initval) { u32 a, b, c; const u8 *k = key; /* Set up the internal state */ a = b = c = JHASH_INITVAL + length + initval; /* All but the last block: affect some 32 bits of (a,b,c) */ while (length > 12) { a += __get_unaligned_cpu32(k); b += __get_unaligned_cpu32(k + 4); c += __get_unaligned_cpu32(k + 8); __jhash_mix(a, b, c); length -= 12; k += 12; } /* Last block: affect all 32 bits of (c) */ switch (length) { case 12: c += (u32)k[11]<<24; fallthrough; case 11: c += (u32)k[10]<<16; fallthrough; case 10: c += (u32)k[9]<<8; fallthrough; case 9: c += k[8]; fallthrough; case 8: b += (u32)k[7]<<24; fallthrough; case 7: b += (u32)k[6]<<16; fallthrough; case 6: b += (u32)k[5]<<8; fallthrough; case 5: b += k[4]; fallthrough; case 4: a += (u32)k[3]<<24; fallthrough; case 3: a += (u32)k[2]<<16; fallthrough; case 2: a += (u32)k[1]<<8; fallthrough; case 1: a += k[0]; __jhash_final(a, b, c); break; case 0: /* Nothing left to add */ break; } return c; } /* jhash2 - hash an array of u32's * @k: the key which must be an array of u32's * @length: the number of u32's in the key * @initval: the previous hash, or an arbitray value * * Returns the hash value of the key. */ static inline u32 jhash2(const u32 *k, u32 length, u32 initval) { u32 a, b, c; /* Set up the internal state */ a = b = c = JHASH_INITVAL + (length<<2) + initval; /* Handle most of the key */ while (length > 3) { a += k[0]; b += k[1]; c += k[2]; __jhash_mix(a, b, c); length -= 3; k += 3; } /* Handle the last 3 u32's */ switch (length) { case 3: c += k[2]; fallthrough; case 2: b += k[1]; fallthrough; case 1: a += k[0]; __jhash_final(a, b, c); break; case 0: /* Nothing left to add */ break; } return c; } /* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */ static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) { a += initval; b += initval; c += initval; __jhash_final(a, b, c); return c; } static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) { return __jhash_nwords(a, b, c, initval + JHASH_INITVAL + (3 << 2)); } static inline u32 jhash_2words(u32 a, u32 b, u32 initval) { return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); } static inline u32 jhash_1word(u32 a, u32 initval) { return __jhash_nwords(a, 0, 0, initval + JHASH_INITVAL + (1 << 2)); } #endif /* _LINUX_JHASH_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _X_TABLES_H #define _X_TABLES_H #include <linux/netdevice.h> #include <linux/static_key.h> #include <linux/netfilter.h> #include <uapi/linux/netfilter/x_tables.h> /* Test a struct->invflags and a boolean for inequality */ #define NF_INVF(ptr, flag, boolean) \ ((boolean) ^ !!((ptr)->invflags & (flag))) /** * struct xt_action_param - parameters for matches/targets * * @match: the match extension * @target: the target extension * @matchinfo: per-match data * @targetinfo: per-target data * @state: pointer to hook state this packet came from * @fragoff: packet is a fragment, this is the data offset * @thoff: position of transport header relative to skb->data * * Fields written to by extensions: * * @hotdrop: drop packet if we had inspection problems */ struct xt_action_param { union { const struct xt_match *match; const struct xt_target *target; }; union { const void *matchinfo, *targinfo; }; const struct nf_hook_state *state; int fragoff; unsigned int thoff; bool hotdrop; }; static inline struct net *xt_net(const struct xt_action_param *par) { return par->state->net; } static inline struct net_device *xt_in(const struct xt_action_param *par) { return par->state->in; } static inline const char *xt_inname(const struct xt_action_param *par) { return par->state->in->name; } static inline struct net_device *xt_out(const struct xt_action_param *par) { return par->state->out; } static inline const char *xt_outname(const struct xt_action_param *par) { return par->state->out->name; } static inline unsigned int xt_hooknum(const struct xt_action_param *par) { return par->state->hook; } static inline u_int8_t xt_family(const struct xt_action_param *par) { return par->state->pf; } /** * struct xt_mtchk_param - parameters for match extensions' * checkentry functions * * @net: network namespace through which the check was invoked * @table: table the rule is tried to be inserted into * @entryinfo: the family-specific rule data * (struct ipt_ip, ip6t_ip, arpt_arp or (note) ebt_entry) * @match: struct xt_match through which this function was invoked * @matchinfo: per-match data * @hook_mask: via which hooks the new rule is reachable * Other fields as above. */ struct xt_mtchk_param { struct net *net; const char *table; const void *entryinfo; const struct xt_match *match; void *matchinfo; unsigned int hook_mask; u_int8_t family; bool nft_compat; }; /** * struct xt_mdtor_param - match destructor parameters * Fields as above. */ struct xt_mtdtor_param { struct net *net; const struct xt_match *match; void *matchinfo; u_int8_t family; }; /** * struct xt_tgchk_param - parameters for target extensions' * checkentry functions * * @entryinfo: the family-specific rule data * (struct ipt_entry, ip6t_entry, arpt_entry, ebt_entry) * * Other fields see above. */ struct xt_tgchk_param { struct net *net; const char *table; const void *entryinfo; const struct xt_target *target; void *targinfo; unsigned int hook_mask; u_int8_t family; bool nft_compat; }; /* Target destructor parameters */ struct xt_tgdtor_param { struct net *net; const struct xt_target *target; void *targinfo; u_int8_t family; }; struct xt_match { struct list_head list; const char name[XT_EXTENSION_MAXNAMELEN]; u_int8_t revision; /* Return true or false: return FALSE and set *hotdrop = 1 to force immediate packet drop. */ /* Arguments changed since 2.6.9, as this must now handle non-linear skb, using skb_header_pointer and skb_ip_make_writable. */ bool (*match)(const struct sk_buff *skb, struct xt_action_param *); /* Called when user tries to insert an entry of this type. */ int (*checkentry)(const struct xt_mtchk_param *); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_mtdtor_param *); #ifdef CONFIG_COMPAT /* Called when userspace align differs from kernel space one */ void (*compat_from_user)(void *dst, const void *src); int (*compat_to_user)(void __user *dst, const void *src); #endif /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; const char *table; unsigned int matchsize; unsigned int usersize; #ifdef CONFIG_COMPAT unsigned int compatsize; #endif unsigned int hooks; unsigned short proto; unsigned short family; }; /* Registration hooks for targets. */ struct xt_target { struct list_head list; const char name[XT_EXTENSION_MAXNAMELEN]; u_int8_t revision; /* Returns verdict. Argument order changed since 2.6.9, as this must now handle non-linear skbs, using skb_copy_bits and skb_ip_make_writable. */ unsigned int (*target)(struct sk_buff *skb, const struct xt_action_param *); /* Called when user tries to insert an entry of this type: hook_mask is a bitmask of hooks from which it can be called. */ /* Should return 0 on success or an error code otherwise (-Exxxx). */ int (*checkentry)(const struct xt_tgchk_param *); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_tgdtor_param *); #ifdef CONFIG_COMPAT /* Called when userspace align differs from kernel space one */ void (*compat_from_user)(void *dst, const void *src); int (*compat_to_user)(void __user *dst, const void *src); #endif /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; const char *table; unsigned int targetsize; unsigned int usersize; #ifdef CONFIG_COMPAT unsigned int compatsize; #endif unsigned int hooks; unsigned short proto; unsigned short family; }; /* Furniture shopping... */ struct xt_table { struct list_head list; /* What hooks you will enter on */ unsigned int valid_hooks; /* Man behind the curtain... */ struct xt_table_info *private; /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; u_int8_t af; /* address/protocol family */ int priority; /* hook order */ /* called when table is needed in the given netns */ int (*table_init)(struct net *net); /* A unique name... */ const char name[XT_TABLE_MAXNAMELEN]; }; #include <linux/netfilter_ipv4.h> /* The table itself */ struct xt_table_info { /* Size per table */ unsigned int size; /* Number of entries: FIXME. --RR */ unsigned int number; /* Initial number of entries. Needed for module usage count */ unsigned int initial_entries; /* Entry points and underflows */ unsigned int hook_entry[NF_INET_NUMHOOKS]; unsigned int underflow[NF_INET_NUMHOOKS]; /* * Number of user chains. Since tables cannot have loops, at most * @stacksize jumps (number of user chains) can possibly be made. */ unsigned int stacksize; void ***jumpstack; unsigned char entries[] __aligned(8); }; int xt_register_target(struct xt_target *target); void xt_unregister_target(struct xt_target *target); int xt_register_targets(struct xt_target *target, unsigned int n); void xt_unregister_targets(struct xt_target *target, unsigned int n); int xt_register_match(struct xt_match *target); void xt_unregister_match(struct xt_match *target); int xt_register_matches(struct xt_match *match, unsigned int n); void xt_unregister_matches(struct xt_match *match, unsigned int n); int xt_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset); int xt_check_table_hooks(const struct xt_table_info *info, unsigned int valid_hooks); unsigned int *xt_alloc_entry_offsets(unsigned int size); bool xt_find_jump_offset(const unsigned int *offsets, unsigned int target, unsigned int size); int xt_check_proc_name(const char *name, unsigned int size); int xt_check_match(struct xt_mtchk_param *, unsigned int size, u16 proto, bool inv_proto); int xt_check_target(struct xt_tgchk_param *, unsigned int size, u16 proto, bool inv_proto); int xt_match_to_user(const struct xt_entry_match *m, struct xt_entry_match __user *u); int xt_target_to_user(const struct xt_entry_target *t, struct xt_entry_target __user *u); int xt_data_to_user(void __user *dst, const void *src, int usersize, int size, int aligned_size); void *xt_copy_counters(sockptr_t arg, unsigned int len, struct xt_counters_info *info); struct xt_counters *xt_counters_alloc(unsigned int counters); struct xt_table *xt_register_table(struct net *net, const struct xt_table *table, struct xt_table_info *bootstrap, struct xt_table_info *newinfo); void *xt_unregister_table(struct xt_table *table); struct xt_table_info *xt_replace_table(struct xt_table *table, unsigned int num_counters, struct xt_table_info *newinfo, int *error); struct xt_match *xt_find_match(u8 af, const char *name, u8 revision); struct xt_match *xt_request_find_match(u8 af, const char *name, u8 revision); struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision); int xt_find_revision(u8 af, const char *name, u8 revision, int target, int *err); struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name); struct xt_table *xt_request_find_table_lock(struct net *net, u_int8_t af, const char *name); void xt_table_unlock(struct xt_table *t); int xt_proto_init(struct net *net, u_int8_t af); void xt_proto_fini(struct net *net, u_int8_t af); struct xt_table_info *xt_alloc_table_info(unsigned int size); void xt_free_table_info(struct xt_table_info *info); /** * xt_recseq - recursive seqcount for netfilter use * * Packet processing changes the seqcount only if no recursion happened * get_counters() can use read_seqcount_begin()/read_seqcount_retry(), * because we use the normal seqcount convention : * Low order bit set to 1 if a writer is active. */ DECLARE_PER_CPU(seqcount_t, xt_recseq); /* xt_tee_enabled - true if x_tables needs to handle reentrancy * * Enabled if current ip(6)tables ruleset has at least one -j TEE rule. */ extern struct static_key xt_tee_enabled; /** * xt_write_recseq_begin - start of a write section * * Begin packet processing : all readers must wait the end * 1) Must be called with preemption disabled * 2) softirqs must be disabled too (or we should use this_cpu_add()) * Returns : * 1 if no recursion on this cpu * 0 if recursion detected */ static inline unsigned int xt_write_recseq_begin(void) { unsigned int addend; /* * Low order bit of sequence is set if we already * called xt_write_recseq_begin(). */ addend = (__this_cpu_read(xt_recseq.sequence) + 1) & 1; /* * This is kind of a write_seqcount_begin(), but addend is 0 or 1 * We dont check addend value to avoid a test and conditional jump, * since addend is most likely 1 */ __this_cpu_add(xt_recseq.sequence, addend); smp_mb(); return addend; } /** * xt_write_recseq_end - end of a write section * @addend: return value from previous xt_write_recseq_begin() * * End packet processing : all readers can proceed * 1) Must be called with preemption disabled * 2) softirqs must be disabled too (or we should use this_cpu_add()) */ static inline void xt_write_recseq_end(unsigned int addend) { /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */ smp_wmb(); __this_cpu_add(xt_recseq.sequence, addend); } /* * This helper is performance critical and must be inlined */ static inline unsigned long ifname_compare_aligned(const char *_a, const char *_b, const char *_mask) { const unsigned long *a = (const unsigned long *)_a; const unsigned long *b = (const unsigned long *)_b; const unsigned long *mask = (const unsigned long *)_mask; unsigned long ret; ret = (a[0] ^ b[0]) & mask[0]; if (IFNAMSIZ > sizeof(unsigned long)) ret |= (a[1] ^ b[1]) & mask[1]; if (IFNAMSIZ > 2 * sizeof(unsigned long)) ret |= (a[2] ^ b[2]) & mask[2]; if (IFNAMSIZ > 3 * sizeof(unsigned long)) ret |= (a[3] ^ b[3]) & mask[3]; BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); return ret; } struct xt_percpu_counter_alloc_state { unsigned int off; const char __percpu *mem; }; bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state, struct xt_counters *counter); void xt_percpu_counter_free(struct xt_counters *cnt); static inline struct xt_counters * xt_get_this_cpu_counter(struct xt_counters *cnt) { if (nr_cpu_ids > 1) return this_cpu_ptr((void __percpu *) (unsigned long) cnt->pcnt); return cnt; } static inline struct xt_counters * xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) { if (nr_cpu_ids > 1) return per_cpu_ptr((void __percpu *) (unsigned long) cnt->pcnt, cpu); return cnt; } struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *); #ifdef CONFIG_COMPAT #include <net/compat.h> struct compat_xt_entry_match { union { struct { u_int16_t match_size; char name[XT_FUNCTION_MAXNAMELEN - 1]; u_int8_t revision; } user; struct { u_int16_t match_size; compat_uptr_t match; } kernel; u_int16_t match_size; } u; unsigned char data[]; }; struct compat_xt_entry_target { union { struct { u_int16_t target_size; char name[XT_FUNCTION_MAXNAMELEN - 1]; u_int8_t revision; } user; struct { u_int16_t target_size; compat_uptr_t target; } kernel; u_int16_t target_size; } u; unsigned char data[]; }; /* FIXME: this works only on 32 bit tasks * need to change whole approach in order to calculate align as function of * current task alignment */ struct compat_xt_counters { compat_u64 pcnt, bcnt; /* Packet and byte counters */ }; struct compat_xt_counters_info { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t num_counters; struct compat_xt_counters counters[]; }; struct _compat_xt_align { __u8 u8; __u16 u16; __u32 u32; compat_u64 u64; }; #define COMPAT_XT_ALIGN(s) __ALIGN_KERNEL((s), __alignof__(struct _compat_xt_align)) void xt_compat_lock(u_int8_t af); void xt_compat_unlock(u_int8_t af); int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta); void xt_compat_flush_offsets(u_int8_t af); int xt_compat_init_offsets(u8 af, unsigned int number); int xt_compat_calc_jump(u_int8_t af, unsigned int offset); int xt_compat_match_offset(const struct xt_match *match); void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, unsigned int *size); int xt_compat_match_to_user(const struct xt_entry_match *m, void __user **dstptr, unsigned int *size); int xt_compat_target_offset(const struct xt_target *target); void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, unsigned int *size); int xt_compat_target_to_user(const struct xt_entry_target *t, void __user **dstptr, unsigned int *size); int xt_compat_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset); #endif /* CONFIG_COMPAT */ #endif /* _X_TABLES_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 /* SPDX-License-Identifier: GPL-2.0 */ /* * Task I/O accounting operations */ #ifndef __TASK_IO_ACCOUNTING_OPS_INCLUDED #define __TASK_IO_ACCOUNTING_OPS_INCLUDED #include <linux/sched.h> #ifdef CONFIG_TASK_IO_ACCOUNTING static inline void task_io_account_read(size_t bytes) { current->ioac.read_bytes += bytes; } /* * We approximate number of blocks, because we account bytes only. * A 'block' is 512 bytes */ static inline unsigned long task_io_get_inblock(const struct task_struct *p) { return p->ioac.read_bytes >> 9; } static inline void task_io_account_write(size_t bytes) { current->ioac.write_bytes += bytes; } /* * We approximate number of blocks, because we account bytes only. * A 'block' is 512 bytes */ static inline unsigned long task_io_get_oublock(const struct task_struct *p) { return p->ioac.write_bytes >> 9; } static inline void task_io_account_cancelled_write(size_t bytes) { current->ioac.cancelled_write_bytes += bytes; } static inline void task_io_accounting_init(struct task_io_accounting *ioac) { memset(ioac, 0, sizeof(*ioac)); } static inline void task_blk_io_accounting_add(struct task_io_accounting *dst, struct task_io_accounting *src) { dst->read_bytes += src->read_bytes; dst->write_bytes += src->write_bytes; dst->cancelled_write_bytes += src->cancelled_write_bytes; } #else static inline void task_io_account_read(size_t bytes) { } static inline unsigned long task_io_get_inblock(const struct task_struct *p) { return 0; } static inline void task_io_account_write(size_t bytes) { } static inline unsigned long task_io_get_oublock(const struct task_struct *p) { return 0; } static inline void task_io_account_cancelled_write(size_t bytes) { } static inline void task_io_accounting_init(struct task_io_accounting *ioac) { } static inline void task_blk_io_accounting_add(struct task_io_accounting *dst, struct task_io_accounting *src) { } #endif /* CONFIG_TASK_IO_ACCOUNTING */ #ifdef CONFIG_TASK_XACCT static inline void task_chr_io_accounting_add(struct task_io_accounting *dst, struct task_io_accounting *src) { dst->rchar += src->rchar; dst->wchar += src->wchar; dst->syscr += src->syscr; dst->syscw += src->syscw; } #else static inline void task_chr_io_accounting_add(struct task_io_accounting *dst, struct task_io_accounting *src) { } #endif /* CONFIG_TASK_XACCT */ static inline void task_io_accounting_add(struct task_io_accounting *dst, struct task_io_accounting *src) { task_chr_io_accounting_add(dst, src); task_blk_io_accounting_add(dst, src); } #endif /* __TASK_IO_ACCOUNTING_OPS_INCLUDED */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MBCACHE_H #define _LINUX_MBCACHE_H #include <linux/hash.h> #include <linux/list_bl.h> #include <linux/list.h> #include <linux/atomic.h> #include <linux/fs.h> struct mb_cache; struct mb_cache_entry { /* List of entries in cache - protected by cache->c_list_lock */ struct list_head e_list; /* Hash table list - protected by hash chain bitlock */ struct hlist_bl_node e_hash_list; atomic_t e_refcnt; /* Key in hash - stable during lifetime of the entry */ u32 e_key; u32 e_referenced:1; u32 e_reusable:1; /* User provided value - stable during lifetime of the entry */ u64 e_value; }; struct mb_cache *mb_cache_create(int bucket_bits); void mb_cache_destroy(struct mb_cache *cache); int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, u64 value, bool reusable); void __mb_cache_entry_free(struct mb_cache_entry *entry); static inline int mb_cache_entry_put(struct mb_cache *cache, struct mb_cache_entry *entry) { if (!atomic_dec_and_test(&entry->e_refcnt)) return 0; __mb_cache_entry_free(entry); return 1; } void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value); struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, u64 value); struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, u32 key); struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache, struct mb_cache_entry *entry); void mb_cache_entry_touch(struct mb_cache *cache, struct mb_cache_entry *entry); #endif /* _LINUX_MBCACHE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 /* * linux/include/video/vga.h -- standard VGA chipset interaction * * Copyright 1999 Jeff Garzik <jgarzik@pobox.com> * * Copyright history from vga16fb.c: * Copyright 1999 Ben Pfaff and Petr Vandrovec * Based on VGA info at http://www.osdever.net/FreeVGA/home.htm * Based on VESA framebuffer (c) 1998 Gerd Knorr * * This file is subject to the terms and conditions of the GNU General * Public License. See the file COPYING in the main directory of this * archive for more details. * */ #ifndef __linux_video_vga_h__ #define __linux_video_vga_h__ #include <linux/types.h> #include <linux/io.h> #include <asm/vga.h> #include <asm/byteorder.h> /* Some of the code below is taken from SVGAlib. The original, unmodified copyright notice for that code is below. */ /* VGAlib version 1.2 - (c) 1993 Tommy Frandsen */ /* */ /* This library is free software; you can redistribute it and/or */ /* modify it without any restrictions. This library is distributed */ /* in the hope that it will be useful, but without any warranty. */ /* Multi-chipset support Copyright 1993 Harm Hanemaayer */ /* partially copyrighted (C) 1993 by Hartmut Schirmer */ /* VGA data register ports */ #define VGA_CRT_DC 0x3D5 /* CRT Controller Data Register - color emulation */ #define VGA_CRT_DM 0x3B5 /* CRT Controller Data Register - mono emulation */ #define VGA_ATT_R 0x3C1 /* Attribute Controller Data Read Register */ #define VGA_ATT_W 0x3C0 /* Attribute Controller Data Write Register */ #define VGA_GFX_D 0x3CF /* Graphics Controller Data Register */ #define VGA_SEQ_D 0x3C5 /* Sequencer Data Register */ #define VGA_MIS_R 0x3CC /* Misc Output Read Register */ #define VGA_MIS_W 0x3C2 /* Misc Output Write Register */ #define VGA_FTC_R 0x3CA /* Feature Control Read Register */ #define VGA_IS1_RC 0x3DA /* Input Status Register 1 - color emulation */ #define VGA_IS1_RM 0x3BA /* Input Status Register 1 - mono emulation */ #define VGA_PEL_D 0x3C9 /* PEL Data Register */ #define VGA_PEL_MSK 0x3C6 /* PEL mask register */ /* EGA-specific registers */ #define EGA_GFX_E0 0x3CC /* Graphics enable processor 0 */ #define EGA_GFX_E1 0x3CA /* Graphics enable processor 1 */ /* VGA index register ports */ #define VGA_CRT_IC 0x3D4 /* CRT Controller Index - color emulation */ #define VGA_CRT_IM 0x3B4 /* CRT Controller Index - mono emulation */ #define VGA_ATT_IW 0x3C0 /* Attribute Controller Index & Data Write Register */ #define VGA_GFX_I 0x3CE /* Graphics Controller Index */ #define VGA_SEQ_I 0x3C4 /* Sequencer Index */ #define VGA_PEL_IW 0x3C8 /* PEL Write Index */ #define VGA_PEL_IR 0x3C7 /* PEL Read Index */ /* standard VGA indexes max counts */ #define VGA_CRT_C 0x19 /* Number of CRT Controller Registers */ #define VGA_ATT_C 0x15 /* Number of Attribute Controller Registers */ #define VGA_GFX_C 0x09 /* Number of Graphics Controller Registers */ #define VGA_SEQ_C 0x05 /* Number of Sequencer Registers */ #define VGA_MIS_C 0x01 /* Number of Misc Output Register */ /* VGA misc register bit masks */ #define VGA_MIS_COLOR 0x01 #define VGA_MIS_ENB_MEM_ACCESS 0x02 #define VGA_MIS_DCLK_28322_720 0x04 #define VGA_MIS_ENB_PLL_LOAD (0x04 | 0x08) #define VGA_MIS_SEL_HIGH_PAGE 0x20 /* VGA CRT controller register indices */ #define VGA_CRTC_H_TOTAL 0 #define VGA_CRTC_H_DISP 1 #define VGA_CRTC_H_BLANK_START 2 #define VGA_CRTC_H_BLANK_END 3 #define VGA_CRTC_H_SYNC_START 4 #define VGA_CRTC_H_SYNC_END 5 #define VGA_CRTC_V_TOTAL 6 #define VGA_CRTC_OVERFLOW 7 #define VGA_CRTC_PRESET_ROW 8 #define VGA_CRTC_MAX_SCAN 9 #define VGA_CRTC_CURSOR_START 0x0A #define VGA_CRTC_CURSOR_END 0x0B #define VGA_CRTC_START_HI 0x0C #define VGA_CRTC_START_LO 0x0D #define VGA_CRTC_CURSOR_HI 0x0E #define VGA_CRTC_CURSOR_LO 0x0F #define VGA_CRTC_V_SYNC_START 0x10 #define VGA_CRTC_V_SYNC_END 0x11 #define VGA_CRTC_V_DISP_END 0x12 #define VGA_CRTC_OFFSET 0x13 #define VGA_CRTC_UNDERLINE 0x14 #define VGA_CRTC_V_BLANK_START 0x15 #define VGA_CRTC_V_BLANK_END 0x16 #define VGA_CRTC_MODE 0x17 #define VGA_CRTC_LINE_COMPARE 0x18 #define VGA_CRTC_REGS VGA_CRT_C /* VGA CRT controller bit masks */ #define VGA_CR11_LOCK_CR0_CR7 0x80 /* lock writes to CR0 - CR7 */ #define VGA_CR17_H_V_SIGNALS_ENABLED 0x80 /* VGA attribute controller register indices */ #define VGA_ATC_PALETTE0 0x00 #define VGA_ATC_PALETTE1 0x01 #define VGA_ATC_PALETTE2 0x02 #define VGA_ATC_PALETTE3 0x03 #define VGA_ATC_PALETTE4 0x04 #define VGA_ATC_PALETTE5 0x05 #define VGA_ATC_PALETTE6 0x06 #define VGA_ATC_PALETTE7 0x07 #define VGA_ATC_PALETTE8 0x08 #define VGA_ATC_PALETTE9 0x09 #define VGA_ATC_PALETTEA 0x0A #define VGA_ATC_PALETTEB 0x0B #define VGA_ATC_PALETTEC 0x0C #define VGA_ATC_PALETTED 0x0D #define VGA_ATC_PALETTEE 0x0E #define VGA_ATC_PALETTEF 0x0F #define VGA_ATC_MODE 0x10 #define VGA_ATC_OVERSCAN 0x11 #define VGA_ATC_PLANE_ENABLE 0x12 #define VGA_ATC_PEL 0x13 #define VGA_ATC_COLOR_PAGE 0x14 #define VGA_AR_ENABLE_DISPLAY 0x20 /* VGA sequencer register indices */ #define VGA_SEQ_RESET 0x00 #define VGA_SEQ_CLOCK_MODE 0x01 #define VGA_SEQ_PLANE_WRITE 0x02 #define VGA_SEQ_CHARACTER_MAP 0x03 #define VGA_SEQ_MEMORY_MODE 0x04 /* VGA sequencer register bit masks */ #define VGA_SR01_CHAR_CLK_8DOTS 0x01 /* bit 0: character clocks 8 dots wide are generated */ #define VGA_SR01_SCREEN_OFF 0x20 /* bit 5: Screen is off */ #define VGA_SR02_ALL_PLANES 0x0F /* bits 3-0: enable access to all planes */ #define VGA_SR04_EXT_MEM 0x02 /* bit 1: allows complete mem access to 256K */ #define VGA_SR04_SEQ_MODE 0x04 /* bit 2: directs system to use a sequential addressing mode */ #define VGA_SR04_CHN_4M 0x08 /* bit 3: selects modulo 4 addressing for CPU access to display memory */ /* VGA graphics controller register indices */ #define VGA_GFX_SR_VALUE 0x00 #define VGA_GFX_SR_ENABLE 0x01 #define VGA_GFX_COMPARE_VALUE 0x02 #define VGA_GFX_DATA_ROTATE 0x03 #define VGA_GFX_PLANE_READ 0x04 #define VGA_GFX_MODE 0x05 #define VGA_GFX_MISC 0x06 #define VGA_GFX_COMPARE_MASK 0x07 #define VGA_GFX_BIT_MASK 0x08 /* VGA graphics controller bit masks */ #define VGA_GR06_GRAPHICS_MODE 0x01 /* macro for composing an 8-bit VGA register index and value * into a single 16-bit quantity */ #define VGA_OUT16VAL(v, r) (((v) << 8) | (r)) /* decide whether we should enable the faster 16-bit VGA register writes */ #ifdef __LITTLE_ENDIAN #define VGA_OUTW_WRITE #endif /* VGA State Save and Restore */ #define VGA_SAVE_FONT0 1 /* save/restore plane 2 fonts */ #define VGA_SAVE_FONT1 2 /* save/restore plane 3 fonts */ #define VGA_SAVE_TEXT 4 /* save/restore plane 0/1 fonts */ #define VGA_SAVE_FONTS 7 /* save/restore all fonts */ #define VGA_SAVE_MODE 8 /* save/restore video mode */ #define VGA_SAVE_CMAP 16 /* save/restore color map/DAC */ struct vgastate { void __iomem *vgabase; /* mmio base, if supported */ unsigned long membase; /* VGA window base, 0 for default - 0xA000 */ __u32 memsize; /* VGA window size, 0 for default 64K */ __u32 flags; /* what state[s] to save (see VGA_SAVE_*) */ __u32 depth; /* current fb depth, not important */ __u32 num_attr; /* number of att registers, 0 for default */ __u32 num_crtc; /* number of crt registers, 0 for default */ __u32 num_gfx; /* number of gfx registers, 0 for default */ __u32 num_seq; /* number of seq registers, 0 for default */ void *vidstate; }; extern int save_vga(struct vgastate *state); extern int restore_vga(struct vgastate *state); /* * generic VGA port read/write */ static inline unsigned char vga_io_r (unsigned short port) { return inb_p(port); } static inline void vga_io_w (unsigned short port, unsigned char val) { outb_p(val, port); } static inline void vga_io_w_fast (unsigned short port, unsigned char reg, unsigned char val) { outw(VGA_OUT16VAL (val, reg), port); } static inline unsigned char vga_mm_r (void __iomem *regbase, unsigned short port) { return readb (regbase + port); } static inline void vga_mm_w (void __iomem *regbase, unsigned short port, unsigned char val) { writeb (val, regbase + port); } static inline void vga_mm_w_fast (void __iomem *regbase, unsigned short port, unsigned char reg, unsigned char val) { writew (VGA_OUT16VAL (val, reg), regbase + port); } static inline unsigned char vga_r (void __iomem *regbase, unsigned short port) { if (regbase) return vga_mm_r (regbase, port); else return vga_io_r (port); } static inline void vga_w (void __iomem *regbase, unsigned short port, unsigned char val) { if (regbase) vga_mm_w (regbase, port, val); else vga_io_w (port, val); } static inline void vga_w_fast (void __iomem *regbase, unsigned short port, unsigned char reg, unsigned char val) { if (regbase) vga_mm_w_fast (regbase, port, reg, val); else vga_io_w_fast (port, reg, val); } /* * VGA CRTC register read/write */ static inline unsigned char vga_rcrt (void __iomem *regbase, unsigned char reg) { vga_w (regbase, VGA_CRT_IC, reg); return vga_r (regbase, VGA_CRT_DC); } static inline void vga_wcrt (void __iomem *regbase, unsigned char reg, unsigned char val) { #ifdef VGA_OUTW_WRITE vga_w_fast (regbase, VGA_CRT_IC, reg, val); #else vga_w (regbase, VGA_CRT_IC, reg); vga_w (regbase, VGA_CRT_DC, val); #endif /* VGA_OUTW_WRITE */ } static inline unsigned char vga_io_rcrt (unsigned char reg) { vga_io_w (VGA_CRT_IC, reg); return vga_io_r (VGA_CRT_DC); } static inline void vga_io_wcrt (unsigned char reg, unsigned char val) { #ifdef VGA_OUTW_WRITE vga_io_w_fast (VGA_CRT_IC, reg, val); #else vga_io_w (VGA_CRT_IC, reg); vga_io_w (VGA_CRT_DC, val); #endif /* VGA_OUTW_WRITE */ } static inline unsigned char vga_mm_rcrt (void __iomem *regbase, unsigned char reg) { vga_mm_w (regbase, VGA_CRT_IC, reg); return vga_mm_r (regbase, VGA_CRT_DC); } static inline void vga_mm_wcrt (void __iomem *regbase, unsigned char reg, unsigned char val) { #ifdef VGA_OUTW_WRITE vga_mm_w_fast (regbase, VGA_CRT_IC, reg, val); #else vga_mm_w (regbase, VGA_CRT_IC, reg); vga_mm_w (regbase, VGA_CRT_DC, val); #endif /* VGA_OUTW_WRITE */ } /* * VGA sequencer register read/write */ static inline unsigned char vga_rseq (void __iomem *regbase, unsigned char reg) { vga_w (regbase, VGA_SEQ_I, reg); return vga_r (regbase, VGA_SEQ_D); } static inline void vga_wseq (void __iomem *regbase, unsigned char reg, unsigned char val) { #ifdef VGA_OUTW_WRITE vga_w_fast (regbase, VGA_SEQ_I, reg, val); #else vga_w (regbase, VGA_SEQ_I, reg); vga_w (regbase, VGA_SEQ_D, val); #endif /* VGA_OUTW_WRITE */ } static inline unsigned char vga_io_rseq (unsigned char reg) { vga_io_w (VGA_SEQ_I, reg); return vga_io_r (VGA_SEQ_D); } static inline void vga_io_wseq (unsigned char reg, unsigned char val) { #ifdef VGA_OUTW_WRITE vga_io_w_fast (VGA_SEQ_I, reg, val); #else vga_io_w (VGA_SEQ_I, reg); vga_io_w (VGA_SEQ_D, val); #endif /* VGA_OUTW_WRITE */ } static inline unsigned char vga_mm_rseq (void __iomem *regbase, unsigned char reg) { vga_mm_w (regbase, VGA_SEQ_I, reg); return vga_mm_r (regbase, VGA_SEQ_D); } static inline void vga_mm_wseq (void __iomem *regbase, unsigned char reg, unsigned char val) { #ifdef VGA_OUTW_WRITE vga_mm_w_fast (regbase, VGA_SEQ_I, reg, val); #else vga_mm_w (regbase, VGA_SEQ_I, reg); vga_mm_w (regbase, VGA_SEQ_D, val); #endif /* VGA_OUTW_WRITE */ } /* * VGA graphics controller register read/write */ static inline unsigned char vga_rgfx (void __iomem *regbase, unsigned char reg) { vga_w (regbase, VGA_GFX_I, reg); return vga_r (regbase, VGA_GFX_D); } static inline void vga_wgfx (void __iomem *regbase, unsigned char reg, unsigned char val) { #ifdef VGA_OUTW_WRITE vga_w_fast (regbase, VGA_GFX_I, reg, val); #else vga_w (regbase, VGA_GFX_I, reg); vga_w (regbase, VGA_GFX_D, val); #endif /* VGA_OUTW_WRITE */ } static inline unsigned char vga_io_rgfx (unsigned char reg) { vga_io_w (VGA_GFX_I, reg); return vga_io_r (VGA_GFX_D); } static inline void vga_io_wgfx (unsigned char reg, unsigned char val) { #ifdef VGA_OUTW_WRITE vga_io_w_fast (VGA_GFX_I, reg, val); #else vga_io_w (VGA_GFX_I, reg); vga_io_w (VGA_GFX_D, val); #endif /* VGA_OUTW_WRITE */ } static inline unsigned char vga_mm_rgfx (void __iomem *regbase, unsigned char reg) { vga_mm_w (regbase, VGA_GFX_I, reg); return vga_mm_r (regbase, VGA_GFX_D); } static inline void vga_mm_wgfx (void __iomem *regbase, unsigned char reg, unsigned char val) { #ifdef VGA_OUTW_WRITE vga_mm_w_fast (regbase, VGA_GFX_I, reg, val); #else vga_mm_w (regbase, VGA_GFX_I, reg); vga_mm_w (regbase, VGA_GFX_D, val); #endif /* VGA_OUTW_WRITE */ } /* * VGA attribute controller register read/write */ static inline unsigned char vga_rattr (void __iomem *regbase, unsigned char reg) { vga_w (regbase, VGA_ATT_IW, reg); return vga_r (regbase, VGA_ATT_R); } static inline void vga_wattr (void __iomem *regbase, unsigned char reg, unsigned char val) { vga_w (regbase, VGA_ATT_IW, reg); vga_w (regbase, VGA_ATT_W, val); } static inline unsigned char vga_io_rattr (unsigned char reg) { vga_io_w (VGA_ATT_IW, reg); return vga_io_r (VGA_ATT_R); } static inline void vga_io_wattr (unsigned char reg, unsigned char val) { vga_io_w (VGA_ATT_IW, reg); vga_io_w (VGA_ATT_W, val); } static inline unsigned char vga_mm_rattr (void __iomem *regbase, unsigned char reg) { vga_mm_w (regbase, VGA_ATT_IW, reg); return vga_mm_r (regbase, VGA_ATT_R); } static inline void vga_mm_wattr (void __iomem *regbase, unsigned char reg, unsigned char val) { vga_mm_w (regbase, VGA_ATT_IW, reg); vga_mm_w (regbase, VGA_ATT_W, val); } #endif /* __linux_video_vga_h__ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 /* SPDX-License-Identifier: GPL-2.0-only */ /* * A policy database (policydb) specifies the * configuration data for the security policy. * * Author : Stephen Smalley, <sds@tycho.nsa.gov> */ /* * Updated: Trusted Computer Solutions, Inc. <dgoeddel@trustedcs.com> * * Support for enhanced MLS infrastructure. * * Updated: Frank Mayer <mayerf@tresys.com> and Karl MacMillan <kmacmillan@tresys.com> * * Added conditional policy language extensions * * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc. * Copyright (C) 2003 - 2004 Tresys Technology, LLC */ #ifndef _SS_POLICYDB_H_ #define _SS_POLICYDB_H_ #include "symtab.h" #include "avtab.h" #include "sidtab.h" #include "ebitmap.h" #include "mls_types.h" #include "context.h" #include "constraint.h" /* * A datum type is defined for each kind of symbol * in the configuration data: individual permissions, * common prefixes for access vectors, classes, * users, roles, types, sensitivities, categories, etc. */ /* Permission attributes */ struct perm_datum { u32 value; /* permission bit + 1 */ }; /* Attributes of a common prefix for access vectors */ struct common_datum { u32 value; /* internal common value */ struct symtab permissions; /* common permissions */ }; /* Class attributes */ struct class_datum { u32 value; /* class value */ char *comkey; /* common name */ struct common_datum *comdatum; /* common datum */ struct symtab permissions; /* class-specific permission symbol table */ struct constraint_node *constraints; /* constraints on class permissions */ struct constraint_node *validatetrans; /* special transition rules */ /* Options how a new object user, role, and type should be decided */ #define DEFAULT_SOURCE 1 #define DEFAULT_TARGET 2 char default_user; char default_role; char default_type; /* Options how a new object range should be decided */ #define DEFAULT_SOURCE_LOW 1 #define DEFAULT_SOURCE_HIGH 2 #define DEFAULT_SOURCE_LOW_HIGH 3 #define DEFAULT_TARGET_LOW 4 #define DEFAULT_TARGET_HIGH 5 #define DEFAULT_TARGET_LOW_HIGH 6 #define DEFAULT_GLBLUB 7 char default_range; }; /* Role attributes */ struct role_datum { u32 value; /* internal role value */ u32 bounds; /* boundary of role */ struct ebitmap dominates; /* set of roles dominated by this role */ struct ebitmap types; /* set of authorized types for role */ }; struct role_trans_key { u32 role; /* current role */ u32 type; /* program executable type, or new object type */ u32 tclass; /* process class, or new object class */ }; struct role_trans_datum { u32 new_role; /* new role */ }; struct filename_trans_key { u32 ttype; /* parent dir context */ u16 tclass; /* class of new object */ const char *name; /* last path component */ }; struct filename_trans_datum { struct ebitmap stypes; /* bitmap of source types for this otype */ u32 otype; /* resulting type of new object */ struct filename_trans_datum *next; /* record for next otype*/ }; struct role_allow { u32 role; /* current role */ u32 new_role; /* new role */ struct role_allow *next; }; /* Type attributes */ struct type_datum { u32 value; /* internal type value */ u32 bounds; /* boundary of type */ unsigned char primary; /* primary name? */ unsigned char attribute;/* attribute ?*/ }; /* User attributes */ struct user_datum { u32 value; /* internal user value */ u32 bounds; /* bounds of user */ struct ebitmap roles; /* set of authorized roles for user */ struct mls_range range; /* MLS range (min - max) for user */ struct mls_level dfltlevel; /* default login MLS level for user */ }; /* Sensitivity attributes */ struct level_datum { struct mls_level *level; /* sensitivity and associated categories */ unsigned char isalias; /* is this sensitivity an alias for another? */ }; /* Category attributes */ struct cat_datum { u32 value; /* internal category bit + 1 */ unsigned char isalias; /* is this category an alias for another? */ }; struct range_trans { u32 source_type; u32 target_type; u32 target_class; }; /* Boolean data type */ struct cond_bool_datum { __u32 value; /* internal type value */ int state; }; struct cond_node; /* * type set preserves data needed to determine constraint info from * policy source. This is not used by the kernel policy but allows * utilities such as audit2allow to determine constraint denials. */ struct type_set { struct ebitmap types; struct ebitmap negset; u32 flags; }; /* * The configuration data includes security contexts for * initial SIDs, unlabeled file systems, TCP and UDP port numbers, * network interfaces, and nodes. This structure stores the * relevant data for one such entry. Entries of the same kind * (e.g. all initial SIDs) are linked together into a list. */ struct ocontext { union { char *name; /* name of initial SID, fs, netif, fstype, path */ struct { u8 protocol; u16 low_port; u16 high_port; } port; /* TCP or UDP port information */ struct { u32 addr; u32 mask; } node; /* node information */ struct { u32 addr[4]; u32 mask[4]; } node6; /* IPv6 node information */ struct { u64 subnet_prefix; u16 low_pkey; u16 high_pkey; } ibpkey; struct { char *dev_name; u8 port; } ibendport; } u; union { u32 sclass; /* security class for genfs */ u32 behavior; /* labeling behavior for fs_use */ } v; struct context context[2]; /* security context(s) */ u32 sid[2]; /* SID(s) */ struct ocontext *next; }; struct genfs { char *fstype; struct ocontext *head; struct genfs *next; }; /* symbol table array indices */ #define SYM_COMMONS 0 #define SYM_CLASSES 1 #define SYM_ROLES 2 #define SYM_TYPES 3 #define SYM_USERS 4 #define SYM_BOOLS 5 #define SYM_LEVELS 6 #define SYM_CATS 7 #define SYM_NUM 8 /* object context array indices */ #define OCON_ISID 0 /* initial SIDs */ #define OCON_FS 1 /* unlabeled file systems */ #define OCON_PORT 2 /* TCP and UDP port numbers */ #define OCON_NETIF 3 /* network interfaces */ #define OCON_NODE 4 /* nodes */ #define OCON_FSUSE 5 /* fs_use */ #define OCON_NODE6 6 /* IPv6 nodes */ #define OCON_IBPKEY 7 /* Infiniband PKeys */ #define OCON_IBENDPORT 8 /* Infiniband end ports */ #define OCON_NUM 9 /* The policy database */ struct policydb { int mls_enabled; /* symbol tables */ struct symtab symtab[SYM_NUM]; #define p_commons symtab[SYM_COMMONS] #define p_classes symtab[SYM_CLASSES] #define p_roles symtab[SYM_ROLES] #define p_types symtab[SYM_TYPES] #define p_users symtab[SYM_USERS] #define p_bools symtab[SYM_BOOLS] #define p_levels symtab[SYM_LEVELS] #define p_cats symtab[SYM_CATS] /* symbol names indexed by (value - 1) */ char **sym_val_to_name[SYM_NUM]; /* class, role, and user attributes indexed by (value - 1) */ struct class_datum **class_val_to_struct; struct role_datum **role_val_to_struct; struct user_datum **user_val_to_struct; struct type_datum **type_val_to_struct; /* type enforcement access vectors and transitions */ struct avtab te_avtab; /* role transitions */ struct hashtab role_tr; /* file transitions with the last path component */ /* quickly exclude lookups when parent ttype has no rules */ struct ebitmap filename_trans_ttypes; /* actual set of filename_trans rules */ struct hashtab filename_trans; /* only used if policyvers < POLICYDB_VERSION_COMP_FTRANS */ u32 compat_filename_trans_count; /* bools indexed by (value - 1) */ struct cond_bool_datum **bool_val_to_struct; /* type enforcement conditional access vectors and transitions */ struct avtab te_cond_avtab; /* array indexing te_cond_avtab by conditional */ struct cond_node *cond_list; u32 cond_list_len; /* role allows */ struct role_allow *role_allow; /* security contexts of initial SIDs, unlabeled file systems, TCP or UDP port numbers, network interfaces and nodes */ struct ocontext *ocontexts[OCON_NUM]; /* security contexts for files in filesystems that cannot support a persistent label mapping or use another fixed labeling behavior. */ struct genfs *genfs; /* range transitions table (range_trans_key -> mls_range) */ struct hashtab range_tr; /* type -> attribute reverse mapping */ struct ebitmap *type_attr_map_array; struct ebitmap policycaps; struct ebitmap permissive_map; /* length of this policy when it was loaded */ size_t len; unsigned int policyvers; unsigned int reject_unknown : 1; unsigned int allow_unknown : 1; u16 process_class; u32 process_trans_perms; } __randomize_layout; extern void policydb_destroy(struct policydb *p); extern int policydb_load_isids(struct policydb *p, struct sidtab *s); extern int policydb_context_isvalid(struct policydb *p, struct context *c); extern int policydb_class_isvalid(struct policydb *p, unsigned int class); extern int policydb_type_isvalid(struct policydb *p, unsigned int type); extern int policydb_role_isvalid(struct policydb *p, unsigned int role); extern int policydb_read(struct policydb *p, void *fp); extern int policydb_write(struct policydb *p, void *fp); extern struct filename_trans_datum *policydb_filenametr_search( struct policydb *p, struct filename_trans_key *key); extern struct mls_range *policydb_rangetr_search( struct policydb *p, struct range_trans *key); extern struct role_trans_datum *policydb_roletr_search( struct policydb *p, struct role_trans_key *key); #define POLICYDB_CONFIG_MLS 1 /* the config flags related to unknown classes/perms are bits 2 and 3 */ #define REJECT_UNKNOWN 0x00000002 #define ALLOW_UNKNOWN 0x00000004 #define OBJECT_R "object_r" #define OBJECT_R_VAL 1 #define POLICYDB_MAGIC SELINUX_MAGIC #define POLICYDB_STRING "SE Linux" struct policy_file { char *data; size_t len; }; struct policy_data { struct policydb *p; void *fp; }; static inline int next_entry(void *buf, struct policy_file *fp, size_t bytes) { if (bytes > fp->len) return -EINVAL; memcpy(buf, fp->data, bytes); fp->data += bytes; fp->len -= bytes; return 0; } static inline int put_entry(const void *buf, size_t bytes, int num, struct policy_file *fp) { size_t len = bytes * num; memcpy(fp->data, buf, len); fp->data += len; fp->len -= len; return 0; } static inline char *sym_name(struct policydb *p, unsigned int sym_num, unsigned int element_nr) { return p->sym_val_to_name[sym_num][element_nr]; } extern u16 string_to_security_class(struct policydb *p, const char *name); extern u32 string_to_av_perm(struct policydb *p, u16 tclass, const char *name); #endif /* _SS_POLICYDB_H_ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 /* SPDX-License-Identifier: GPL-2.0 */ /* File: linux/xattr.h Extended attributes handling. Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org> Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved. Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> */ #ifndef _LINUX_XATTR_H #define _LINUX_XATTR_H #include <linux/slab.h> #include <linux/types.h> #include <linux/spinlock.h> #include <linux/mm.h> #include <uapi/linux/xattr.h> struct inode; struct dentry; /* * struct xattr_handler: When @name is set, match attributes with exactly that * name. When @prefix is set instead, match attributes with that prefix and * with a non-empty suffix. */ struct xattr_handler { const char *name; const char *prefix; int flags; /* fs private flags */ bool (*list)(struct dentry *dentry); int (*get)(const struct xattr_handler *, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size); int (*set)(const struct xattr_handler *, struct dentry *dentry, struct inode *inode, const char *name, const void *buffer, size_t size, int flags); }; const char *xattr_full_name(const struct xattr_handler *, const char *); struct xattr { const char *name; void *value; size_t value_len; }; ssize_t __vfs_getxattr(struct dentry *, struct inode *, const char *, void *, size_t); ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t); ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); int __vfs_setxattr(struct dentry *, struct inode *, const char *, const void *, size_t, int); int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, int); int __vfs_setxattr_locked(struct dentry *, const char *, const void *, size_t, int, struct inode **); int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int); int __vfs_removexattr(struct dentry *, const char *); int __vfs_removexattr_locked(struct dentry *, const char *, struct inode **); int vfs_removexattr(struct dentry *, const char *); ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); ssize_t vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value, size_t size, gfp_t flags); int xattr_supported_namespace(struct inode *inode, const char *prefix); static inline const char *xattr_prefix(const struct xattr_handler *handler) { return handler->prefix ?: handler->name; } struct simple_xattrs { struct list_head head; spinlock_t lock; }; struct simple_xattr { struct list_head list; char *name; size_t size; char value[]; }; /* * initialize the simple_xattrs structure */ static inline void simple_xattrs_init(struct simple_xattrs *xattrs) { INIT_LIST_HEAD(&xattrs->head); spin_lock_init(&xattrs->lock); } /* * free all the xattrs */ static inline void simple_xattrs_free(struct simple_xattrs *xattrs) { struct simple_xattr *xattr, *node; list_for_each_entry_safe(xattr, node, &xattrs->head, list) { kfree(xattr->name); kvfree(xattr); } } struct simple_xattr *simple_xattr_alloc(const void *value, size_t size); int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, void *buffer, size_t size); int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, const void *value, size_t size, int flags, ssize_t *removed_size); ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer, size_t size); void simple_xattr_list_add(struct simple_xattrs *xattrs, struct simple_xattr *new_xattr); #endif /* _LINUX_XATTR_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef BLK_MQ_H #define BLK_MQ_H #include <linux/blkdev.h> #include <linux/sbitmap.h> #include <linux/srcu.h> struct blk_mq_tags; struct blk_flush_queue; /** * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware * block device */ struct blk_mq_hw_ctx { struct { /** @lock: Protects the dispatch list. */ spinlock_t lock; /** * @dispatch: Used for requests that are ready to be * dispatched to the hardware but for some reason (e.g. lack of * resources) could not be sent to the hardware. As soon as the * driver can send new requests, requests at this list will * be sent first for a fairer dispatch. */ struct list_head dispatch; /** * @state: BLK_MQ_S_* flags. Defines the state of the hw * queue (active, scheduled to restart, stopped). */ unsigned long state; } ____cacheline_aligned_in_smp; /** * @run_work: Used for scheduling a hardware queue run at a later time. */ struct delayed_work run_work; /** @cpumask: Map of available CPUs where this hctx can run. */ cpumask_var_t cpumask; /** * @next_cpu: Used by blk_mq_hctx_next_cpu() for round-robin CPU * selection from @cpumask. */ int next_cpu; /** * @next_cpu_batch: Counter of how many works left in the batch before * changing to the next CPU. */ int next_cpu_batch; /** @flags: BLK_MQ_F_* flags. Defines the behaviour of the queue. */ unsigned long flags; /** * @sched_data: Pointer owned by the IO scheduler attached to a request * queue. It's up to the IO scheduler how to use this pointer. */ void *sched_data; /** * @queue: Pointer to the request queue that owns this hardware context. */ struct request_queue *queue; /** @fq: Queue of requests that need to perform a flush operation. */ struct blk_flush_queue *fq; /** * @driver_data: Pointer to data owned by the block driver that created * this hctx */ void *driver_data; /** * @ctx_map: Bitmap for each software queue. If bit is on, there is a * pending request in that software queue. */ struct sbitmap ctx_map; /** * @dispatch_from: Software queue to be used when no scheduler was * selected. */ struct blk_mq_ctx *dispatch_from; /** * @dispatch_busy: Number used by blk_mq_update_dispatch_busy() to * decide if the hw_queue is busy using Exponential Weighted Moving * Average algorithm. */ unsigned int dispatch_busy; /** @type: HCTX_TYPE_* flags. Type of hardware queue. */ unsigned short type; /** @nr_ctx: Number of software queues. */ unsigned short nr_ctx; /** @ctxs: Array of software queues. */ struct blk_mq_ctx **ctxs; /** @dispatch_wait_lock: Lock for dispatch_wait queue. */ spinlock_t dispatch_wait_lock; /** * @dispatch_wait: Waitqueue to put requests when there is no tag * available at the moment, to wait for another try in the future. */ wait_queue_entry_t dispatch_wait; /** * @wait_index: Index of next available dispatch_wait queue to insert * requests. */ atomic_t wait_index; /** * @tags: Tags owned by the block driver. A tag at this set is only * assigned when a request is dispatched from a hardware queue. */ struct blk_mq_tags *tags; /** * @sched_tags: Tags owned by I/O scheduler. If there is an I/O * scheduler associated with a request queue, a tag is assigned when * that request is allocated. Else, this member is not used. */ struct blk_mq_tags *sched_tags; /** @queued: Number of queued requests. */ unsigned long queued; /** @run: Number of dispatched requests. */ unsigned long run; #define BLK_MQ_MAX_DISPATCH_ORDER 7 /** @dispatched: Number of dispatch requests by queue. */ unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; /** @numa_node: NUMA node the storage adapter has been connected to. */ unsigned int numa_node; /** @queue_num: Index of this hardware queue. */ unsigned int queue_num; /** * @nr_active: Number of active requests. Only used when a tag set is * shared across request queues. */ atomic_t nr_active; /** * @elevator_queued: Number of queued requests on hctx. */ atomic_t elevator_queued; /** @cpuhp_online: List to store request if CPU is going to die */ struct hlist_node cpuhp_online; /** @cpuhp_dead: List to store request if some CPU die. */ struct hlist_node cpuhp_dead; /** @kobj: Kernel object for sysfs. */ struct kobject kobj; /** @poll_considered: Count times blk_poll() was called. */ unsigned long poll_considered; /** @poll_invoked: Count how many requests blk_poll() polled. */ unsigned long poll_invoked; /** @poll_success: Count how many polled requests were completed. */ unsigned long poll_success; #ifdef CONFIG_BLK_DEBUG_FS /** * @debugfs_dir: debugfs directory for this hardware queue. Named * as cpu<cpu_number>. */ struct dentry *debugfs_dir; /** @sched_debugfs_dir: debugfs directory for the scheduler. */ struct dentry *sched_debugfs_dir; #endif /** * @hctx_list: if this hctx is not in use, this is an entry in * q->unused_hctx_list. */ struct list_head hctx_list; /** * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also * blk_mq_hw_ctx_size(). */ struct srcu_struct srcu[]; }; /** * struct blk_mq_queue_map - Map software queues to hardware queues * @mq_map: CPU ID to hardware queue index map. This is an array * with nr_cpu_ids elements. Each element has a value in the range * [@queue_offset, @queue_offset + @nr_queues). * @nr_queues: Number of hardware queues to map CPU IDs onto. * @queue_offset: First hardware queue to map onto. Used by the PCIe NVMe * driver to map each hardware queue type (enum hctx_type) onto a distinct * set of hardware queues. */ struct blk_mq_queue_map { unsigned int *mq_map; unsigned int nr_queues; unsigned int queue_offset; }; /** * enum hctx_type - Type of hardware queue * @HCTX_TYPE_DEFAULT: All I/O not otherwise accounted for. * @HCTX_TYPE_READ: Just for READ I/O. * @HCTX_TYPE_POLL: Polled I/O of any kind. * @HCTX_MAX_TYPES: Number of types of hctx. */ enum hctx_type { HCTX_TYPE_DEFAULT, HCTX_TYPE_READ, HCTX_TYPE_POLL, HCTX_MAX_TYPES, }; /** * struct blk_mq_tag_set - tag set that can be shared between request queues * @map: One or more ctx -> hctx mappings. One map exists for each * hardware queue type (enum hctx_type) that the driver wishes * to support. There are no restrictions on maps being of the * same size, and it's perfectly legal to share maps between * types. * @nr_maps: Number of elements in the @map array. A number in the range * [1, HCTX_MAX_TYPES]. * @ops: Pointers to functions that implement block driver behavior. * @nr_hw_queues: Number of hardware queues supported by the block driver that * owns this data structure. * @queue_depth: Number of tags per hardware queue, reserved tags included. * @reserved_tags: Number of tags to set aside for BLK_MQ_REQ_RESERVED tag * allocations. * @cmd_size: Number of additional bytes to allocate per request. The block * driver owns these additional bytes. * @numa_node: NUMA node the storage adapter has been connected to. * @timeout: Request processing timeout in jiffies. * @flags: Zero or more BLK_MQ_F_* flags. * @driver_data: Pointer to data owned by the block driver that created this * tag set. * @active_queues_shared_sbitmap: * number of active request queues per tag set. * @__bitmap_tags: A shared tags sbitmap, used over all hctx's * @__breserved_tags: * A shared reserved tags sbitmap, used over all hctx's * @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues * elements. * @tag_list_lock: Serializes tag_list accesses. * @tag_list: List of the request queues that use this tag set. See also * request_queue.tag_set_list. */ struct blk_mq_tag_set { struct blk_mq_queue_map map[HCTX_MAX_TYPES]; unsigned int nr_maps; const struct blk_mq_ops *ops; unsigned int nr_hw_queues; unsigned int queue_depth; unsigned int reserved_tags; unsigned int cmd_size; int numa_node; unsigned int timeout; unsigned int flags; void *driver_data; atomic_t active_queues_shared_sbitmap; struct sbitmap_queue __bitmap_tags; struct sbitmap_queue __breserved_tags; struct blk_mq_tags **tags; struct mutex tag_list_lock; struct list_head tag_list; }; /** * struct blk_mq_queue_data - Data about a request inserted in a queue * * @rq: Request pointer. * @last: If it is the last request in the queue. */ struct blk_mq_queue_data { struct request *rq; bool last; }; typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); /** * struct blk_mq_ops - Callback functions that implements block driver * behaviour. */ struct blk_mq_ops { /** * @queue_rq: Queue a new request from block IO. */ blk_status_t (*queue_rq)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); /** * @commit_rqs: If a driver uses bd->last to judge when to submit * requests to hardware, it must define this function. In case of errors * that make us stop issuing further requests, this hook serves the * purpose of kicking the hardware (which the last request otherwise * would have done). */ void (*commit_rqs)(struct blk_mq_hw_ctx *); /** * @get_budget: Reserve budget before queue request, once .queue_rq is * run, it is driver's responsibility to release the * reserved budget. Also we have to handle failure case * of .get_budget for avoiding I/O deadlock. */ bool (*get_budget)(struct request_queue *); /** * @put_budget: Release the reserved budget. */ void (*put_budget)(struct request_queue *); /** * @timeout: Called on request timeout. */ enum blk_eh_timer_return (*timeout)(struct request *, bool); /** * @poll: Called to poll for completion of a specific tag. */ int (*poll)(struct blk_mq_hw_ctx *); /** * @complete: Mark the request as complete. */ void (*complete)(struct request *); /** * @init_hctx: Called when the block layer side of a hardware queue has * been set up, allowing the driver to allocate/init matching * structures. */ int (*init_hctx)(struct blk_mq_hw_ctx *, void *, unsigned int); /** * @exit_hctx: Ditto for exit/teardown. */ void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int); /** * @init_request: Called for every command allocated by the block layer * to allow the driver to set up driver specific data. * * Tag greater than or equal to queue_depth is for setting up * flush request. */ int (*init_request)(struct blk_mq_tag_set *set, struct request *, unsigned int, unsigned int); /** * @exit_request: Ditto for exit/teardown. */ void (*exit_request)(struct blk_mq_tag_set *set, struct request *, unsigned int); /** * @initialize_rq_fn: Called from inside blk_get_request(). */ void (*initialize_rq_fn)(struct request *rq); /** * @cleanup_rq: Called before freeing one request which isn't completed * yet, and usually for freeing the driver private data. */ void (*cleanup_rq)(struct request *); /** * @busy: If set, returns whether or not this queue currently is busy. */ bool (*busy)(struct request_queue *); /** * @map_queues: This allows drivers specify their own queue mapping by * overriding the setup-time function that builds the mq_map. */ int (*map_queues)(struct blk_mq_tag_set *set); #ifdef CONFIG_BLK_DEBUG_FS /** * @show_rq: Used by the debugfs implementation to show driver-specific * information about a request. */ void (*show_rq)(struct seq_file *m, struct request *rq); #endif }; enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1, /* * Set when this device requires underlying blk-mq device for * completing IO: */ BLK_MQ_F_STACKING = 1 << 2, BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3, BLK_MQ_F_BLOCKING = 1 << 5, BLK_MQ_F_NO_SCHED = 1 << 6, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, BLK_MQ_F_ALLOC_POLICY_BITS = 1, BLK_MQ_S_STOPPED = 0, BLK_MQ_S_TAG_ACTIVE = 1, BLK_MQ_S_SCHED_RESTART = 2, /* hw queue is inactive after all its CPUs become offline */ BLK_MQ_S_INACTIVE = 3, BLK_MQ_MAX_DEPTH = 10240, BLK_MQ_CPU_WORK_BATCH = 8, }; #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \ ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \ ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) #define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \ ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \ << BLK_MQ_F_ALLOC_POLICY_START_BIT) struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, void *queuedata); struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q, bool elevator_init); struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set, const struct blk_mq_ops *ops, unsigned int queue_depth, unsigned int set_flags); void blk_mq_unregister_dev(struct device *, struct request_queue *); int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); void blk_mq_free_tag_set(struct blk_mq_tag_set *set); void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); void blk_mq_free_request(struct request *rq); bool blk_mq_queue_inflight(struct request_queue *q); enum { /* return when out of requests */ BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0), /* allocate from reserved pool */ BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1), /* set RQF_PM */ BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2), }; struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, blk_mq_req_flags_t flags); struct request *blk_mq_alloc_request_hctx(struct request_queue *q, unsigned int op, blk_mq_req_flags_t flags, unsigned int hctx_idx); struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); enum { BLK_MQ_UNIQUE_TAG_BITS = 16, BLK_MQ_UNIQUE_TAG_MASK = (1 << BLK_MQ_UNIQUE_TAG_BITS) - 1, }; u32 blk_mq_unique_tag(struct request *rq); static inline u16 blk_mq_unique_tag_to_hwq(u32 unique_tag) { return unique_tag >> BLK_MQ_UNIQUE_TAG_BITS; } static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) { return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; } /** * blk_mq_rq_state() - read the current MQ_RQ_* state of a request * @rq: target request. */ static inline enum mq_rq_state blk_mq_rq_state(struct request *rq) { return READ_ONCE(rq->state); } static inline int blk_mq_request_started(struct request *rq) { return blk_mq_rq_state(rq) != MQ_RQ_IDLE; } static inline int blk_mq_request_completed(struct request *rq) { return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE; } void blk_mq_start_request(struct request *rq); void blk_mq_end_request(struct request *rq, blk_status_t error); void __blk_mq_end_request(struct request *rq, blk_status_t error); void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); void blk_mq_complete_request(struct request *rq); bool blk_mq_complete_request_remote(struct request *rq); bool blk_mq_queue_stopped(struct request_queue *q); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_stop_hw_queues(struct request_queue *q); void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_quiesce_queue(struct request_queue *q); void blk_mq_unquiesce_queue(struct request_queue *q); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset); void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_unfreeze_queue(struct request_queue *q); void blk_freeze_queue_start(struct request_queue *q); void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, unsigned long timeout); int blk_mq_map_queues(struct blk_mq_queue_map *qmap); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); unsigned int blk_mq_rq_cpu(struct request *rq); bool __blk_should_fake_timeout(struct request_queue *q); static inline bool blk_should_fake_timeout(struct request_queue *q) { if (IS_ENABLED(CONFIG_FAIL_IO_TIMEOUT) && test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags)) return __blk_should_fake_timeout(q); return false; } /** * blk_mq_rq_from_pdu - cast a PDU to a request * @pdu: the PDU (Protocol Data Unit) to be casted * * Return: request * * Driver command data is immediately after the request. So subtract request * size to get back to the original request. */ static inline struct request *blk_mq_rq_from_pdu(void *pdu) { return pdu - sizeof(struct request); } /** * blk_mq_rq_to_pdu - cast a request to a PDU * @rq: the request to be casted * * Return: pointer to the PDU * * Driver command data is immediately after the request. So add request to get * the PDU. */ static inline void *blk_mq_rq_to_pdu(struct request *rq) { return rq + 1; } #define queue_for_each_hw_ctx(q, hctx, i) \ for ((i) = 0; (i) < (q)->nr_hw_queues && \ ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++) #define hctx_for_each_ctx(hctx, ctx, i) \ for ((i) = 0; (i) < (hctx)->nr_ctx && \ ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) { if (rq->tag != -1) return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT); return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) | BLK_QC_T_INTERNAL; } static inline void blk_mq_cleanup_rq(struct request *rq) { if (rq->q->mq_ops->cleanup_rq) rq->q->mq_ops->cleanup_rq(rq); } blk_qc_t blk_mq_submit_bio(struct bio *bio); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_LOCAL_H #define _ASM_X86_LOCAL_H #include <linux/percpu.h> #include <linux/atomic.h> #include <asm/asm.h> typedef struct { atomic_long_t a; } local_t; #define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } #define local_read(l) atomic_long_read(&(l)->a) #define local_set(l, i) atomic_long_set(&(l)->a, (i)) static inline void local_inc(local_t *l) { asm volatile(_ASM_INC "%0" : "+m" (l->a.counter)); } static inline void local_dec(local_t *l) { asm volatile(_ASM_DEC "%0" : "+m" (l->a.counter)); } static inline void local_add(long i, local_t *l) { asm volatile(_ASM_ADD "%1,%0" : "+m" (l->a.counter) : "ir" (i)); } static inline void local_sub(long i, local_t *l) { asm volatile(_ASM_SUB "%1,%0" : "+m" (l->a.counter) : "ir" (i)); } /** * local_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @l: pointer to type local_t * * Atomically subtracts @i from @l and returns * true if the result is zero, or false for all * other cases. */ static inline bool local_sub_and_test(long i, local_t *l) { return GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, e, "er", i); } /** * local_dec_and_test - decrement and test * @l: pointer to type local_t * * Atomically decrements @l by 1 and * returns true if the result is 0, or false for all other * cases. */ static inline bool local_dec_and_test(local_t *l) { return GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, e); } /** * local_inc_and_test - increment and test * @l: pointer to type local_t * * Atomically increments @l by 1 * and returns true if the result is zero, or false for all * other cases. */ static inline bool local_inc_and_test(local_t *l) { return GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, e); } /** * local_add_negative - add and test if negative * @i: integer value to add * @l: pointer to type local_t * * Atomically adds @i to @l and returns true * if the result is negative, or false when * result is greater than or equal to zero. */ static inline bool local_add_negative(long i, local_t *l) { return GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, s, "er", i); } /** * local_add_return - add and return * @i: integer value to add * @l: pointer to type local_t * * Atomically adds @i to @l and returns @i + @l */ static inline long local_add_return(long i, local_t *l) { long __i = i; asm volatile(_ASM_XADD "%0, %1;" : "+r" (i), "+m" (l->a.counter) : : "memory"); return i + __i; } static inline long local_sub_return(long i, local_t *l) { return local_add_return(-i, l); } #define local_inc_return(l) (local_add_return(1, l)) #define local_dec_return(l) (local_sub_return(1, l)) #define local_cmpxchg(l, o, n) \ (cmpxchg_local(&((l)->a.counter), (o), (n))) /* Always has a lock prefix */ #define local_xchg(l, n) (xchg(&((l)->a.counter), (n))) /** * local_add_unless - add unless the number is a given value * @l: pointer of type local_t * @a: the amount to add to l... * @u: ...unless l is equal to u. * * Atomically adds @a to @l, so long as it was not @u. * Returns non-zero if @l was not @u, and zero otherwise. */ #define local_add_unless(l, a, u) \ ({ \ long c, old; \ c = local_read((l)); \ for (;;) { \ if (unlikely(c == (u))) \ break; \ old = local_cmpxchg((l), c, c + (a)); \ if (likely(old == c)) \ break; \ c = old; \ } \ c != (u); \ }) #define local_inc_not_zero(l) local_add_unless((l), 1, 0) /* On x86_32, these are no better than the atomic variants. * On x86-64 these are better than the atomic variants on SMP kernels * because they dont use a lock prefix. */ #define __local_inc(l) local_inc(l) #define __local_dec(l) local_dec(l) #define __local_add(i, l) local_add((i), (l)) #define __local_sub(i, l) local_sub((i), (l)) #endif /* _ASM_X86_LOCAL_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 // SPDX-License-Identifier: GPL-2.0 /* * hrtimers - High-resolution kernel timers * * Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de> * Copyright(C) 2005, Red Hat, Inc., Ingo Molnar * * data type definitions, declarations, prototypes * * Started by: Thomas Gleixner and Ingo Molnar */ #ifndef _LINUX_HRTIMER_H #define _LINUX_HRTIMER_H #include <linux/hrtimer_defs.h> #include <linux/rbtree.h> #include <linux/init.h> #include <linux/list.h> #include <linux/percpu.h> #include <linux/seqlock.h> #include <linux/timer.h> #include <linux/timerqueue.h> struct hrtimer_clock_base; struct hrtimer_cpu_base; /* * Mode arguments of xxx_hrtimer functions: * * HRTIMER_MODE_ABS - Time value is absolute * HRTIMER_MODE_REL - Time value is relative to now * HRTIMER_MODE_PINNED - Timer is bound to CPU (is only considered * when starting the timer) * HRTIMER_MODE_SOFT - Timer callback function will be executed in * soft irq context * HRTIMER_MODE_HARD - Timer callback function will be executed in * hard irq context even on PREEMPT_RT. */ enum hrtimer_mode { HRTIMER_MODE_ABS = 0x00, HRTIMER_MODE_REL = 0x01, HRTIMER_MODE_PINNED = 0x02, HRTIMER_MODE_SOFT = 0x04, HRTIMER_MODE_HARD = 0x08, HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, HRTIMER_MODE_ABS_SOFT = HRTIMER_MODE_ABS | HRTIMER_MODE_SOFT, HRTIMER_MODE_REL_SOFT = HRTIMER_MODE_REL | HRTIMER_MODE_SOFT, HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT, HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT, HRTIMER_MODE_ABS_HARD = HRTIMER_MODE_ABS | HRTIMER_MODE_HARD, HRTIMER_MODE_REL_HARD = HRTIMER_MODE_REL | HRTIMER_MODE_HARD, HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD, HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD, }; /* * Return values for the callback function */ enum hrtimer_restart { HRTIMER_NORESTART, /* Timer is not restarted */ HRTIMER_RESTART, /* Timer must be restarted */ }; /* * Values to track state of the timer * * Possible states: * * 0x00 inactive * 0x01 enqueued into rbtree * * The callback state is not part of the timer->state because clearing it would * mean touching the timer after the callback, this makes it impossible to free * the timer from the callback function. * * Therefore we track the callback state in: * * timer->base->cpu_base->running == timer * * On SMP it is possible to have a "callback function running and enqueued" * status. It happens for example when a posix timer expired and the callback * queued a signal. Between dropping the lock which protects the posix timer * and reacquiring the base lock of the hrtimer, another CPU can deliver the * signal and rearm the timer. * * All state transitions are protected by cpu_base->lock. */ #define HRTIMER_STATE_INACTIVE 0x00 #define HRTIMER_STATE_ENQUEUED 0x01 /** * struct hrtimer - the basic hrtimer structure * @node: timerqueue node, which also manages node.expires, * the absolute expiry time in the hrtimers internal * representation. The time is related to the clock on * which the timer is based. Is setup by adding * slack to the _softexpires value. For non range timers * identical to _softexpires. * @_softexpires: the absolute earliest expiry time of the hrtimer. * The time which was given as expiry time when the timer * was armed. * @function: timer expiry callback function * @base: pointer to the timer base (per cpu and per clock) * @state: state information (See bit values above) * @is_rel: Set if the timer was armed relative * @is_soft: Set if hrtimer will be expired in soft interrupt context. * @is_hard: Set if hrtimer will be expired in hard interrupt context * even on RT. * * The hrtimer structure must be initialized by hrtimer_init() */ struct hrtimer { struct timerqueue_node node; ktime_t _softexpires; enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; u8 state; u8 is_rel; u8 is_soft; u8 is_hard; }; /** * struct hrtimer_sleeper - simple sleeper structure * @timer: embedded timer structure * @task: task to wake up * * task is set to NULL, when the timer expires. */ struct hrtimer_sleeper { struct hrtimer timer; struct task_struct *task; }; #ifdef CONFIG_64BIT # define __hrtimer_clock_base_align ____cacheline_aligned #else # define __hrtimer_clock_base_align #endif /** * struct hrtimer_clock_base - the timer base for a specific clock * @cpu_base: per cpu clock base * @index: clock type index for per_cpu support when moving a * timer to a base on another cpu. * @clockid: clock id for per_cpu support * @seq: seqcount around __run_hrtimer * @running: pointer to the currently running hrtimer * @active: red black tree root node for the active timers * @get_time: function to retrieve the current time of the clock * @offset: offset of this clock to the monotonic base */ struct hrtimer_clock_base { struct hrtimer_cpu_base *cpu_base; unsigned int index; clockid_t clockid; seqcount_raw_spinlock_t seq; struct hrtimer *running; struct timerqueue_head active; ktime_t (*get_time)(void); ktime_t offset; } __hrtimer_clock_base_align; enum hrtimer_base_type { HRTIMER_BASE_MONOTONIC, HRTIMER_BASE_REALTIME, HRTIMER_BASE_BOOTTIME, HRTIMER_BASE_TAI, HRTIMER_BASE_MONOTONIC_SOFT, HRTIMER_BASE_REALTIME_SOFT, HRTIMER_BASE_BOOTTIME_SOFT, HRTIMER_BASE_TAI_SOFT, HRTIMER_MAX_CLOCK_BASES, }; /** * struct hrtimer_cpu_base - the per cpu clock bases * @lock: lock protecting the base and associated clock bases * and timers * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events * @hres_active: State of high resolution mode * @in_hrtirq: hrtimer_interrupt() is currently executing * @hang_detected: The last hrtimer interrupt detected a hang * @softirq_activated: displays, if the softirq is raised - update of softirq * related settings is not required then. * @nr_events: Total number of hrtimer interrupt events * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are * expired * @timer_waiters: A hrtimer_cancel() invocation waits for the timer * callback to finish. * @expires_next: absolute time of the next event, is required for remote * hrtimer enqueue; it is the total first expiry time (hard * and soft hrtimer are taken into account) * @next_timer: Pointer to the first expiring timer * @softirq_expires_next: Time to check, if soft queues needs also to be expired * @softirq_next_timer: Pointer to the first expiring softirq based timer * @clock_base: array of clock bases for this cpu * * Note: next_timer is just an optimization for __remove_hrtimer(). * Do not dereference the pointer because it is not reliable on * cross cpu removals. */ struct hrtimer_cpu_base { raw_spinlock_t lock; unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; unsigned int hres_active : 1, in_hrtirq : 1, hang_detected : 1, softirq_activated : 1; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int nr_events; unsigned short nr_retries; unsigned short nr_hangs; unsigned int max_hang_time; #endif #ifdef CONFIG_PREEMPT_RT spinlock_t softirq_expiry_lock; atomic_t timer_waiters; #endif ktime_t expires_next; struct hrtimer *next_timer; ktime_t softirq_expires_next; struct hrtimer *softirq_next_timer; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; } ____cacheline_aligned; static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { timer->node.expires = time; timer->_softexpires = time; } static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time, ktime_t delta) { timer->_softexpires = time; timer->node.expires = ktime_add_safe(time, delta); } static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, u64 delta) { timer->_softexpires = time; timer->node.expires = ktime_add_safe(time, ns_to_ktime(delta)); } static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64) { timer->node.expires = tv64; timer->_softexpires = tv64; } static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) { timer->node.expires = ktime_add_safe(timer->node.expires, time); timer->_softexpires = ktime_add_safe(timer->_softexpires, time); } static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns) { timer->node.expires = ktime_add_ns(timer->node.expires, ns); timer->_softexpires = ktime_add_ns(timer->_softexpires, ns); } static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer) { return timer->node.expires; } static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer) { return timer->_softexpires; } static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer) { return timer->node.expires; } static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer) { return timer->_softexpires; } static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) { return ktime_to_ns(timer->node.expires); } static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) { return ktime_sub(timer->node.expires, timer->base->get_time()); } static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) { return timer->base->get_time(); } static inline int hrtimer_is_hres_active(struct hrtimer *timer) { return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? timer->base->cpu_base->hres_active : 0; } #ifdef CONFIG_HIGH_RES_TIMERS struct clock_event_device; extern void hrtimer_interrupt(struct clock_event_device *dev); extern unsigned int hrtimer_resolution; #else #define hrtimer_resolution (unsigned int)LOW_RES_NSEC #endif static inline ktime_t __hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now) { ktime_t rem = ktime_sub(timer->node.expires, now); /* * Adjust relative timers for the extra we added in * hrtimer_start_range_ns() to prevent short timeouts. */ if (IS_ENABLED(CONFIG_TIME_LOW_RES) && timer->is_rel) rem -= hrtimer_resolution; return rem; } static inline ktime_t hrtimer_expires_remaining_adjusted(const struct hrtimer *timer) { return __hrtimer_expires_remaining_adjusted(timer, timer->base->get_time()); } #ifdef CONFIG_TIMERFD extern void timerfd_clock_was_set(void); #else static inline void timerfd_clock_was_set(void) { } #endif extern void hrtimers_resume(void); DECLARE_PER_CPU(struct tick_device, tick_cpu_device); #ifdef CONFIG_PREEMPT_RT void hrtimer_cancel_wait_running(const struct hrtimer *timer); #else static inline void hrtimer_cancel_wait_running(struct hrtimer *timer) { cpu_relax(); } #endif /* Exported timer functions: */ /* Initialize timers: */ extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, enum hrtimer_mode mode); #ifdef CONFIG_DEBUG_OBJECTS_TIMERS extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id, enum hrtimer_mode mode); extern void destroy_hrtimer_on_stack(struct hrtimer *timer); #else static inline void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode) { hrtimer_init(timer, which_clock, mode); } static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id, enum hrtimer_mode mode) { hrtimer_init_sleeper(sl, clock_id, mode); } static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } #endif /* Basic timer operations: */ extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 range_ns, const enum hrtimer_mode mode); /** * hrtimer_start - (re)start an hrtimer * @timer: the timer to be added * @tim: expiry time * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); * softirq based mode is considered for debug purpose only! */ static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) { hrtimer_start_range_ns(timer, tim, 0, mode); } extern int hrtimer_cancel(struct hrtimer *timer); extern int hrtimer_try_to_cancel(struct hrtimer *timer); static inline void hrtimer_start_expires(struct hrtimer *timer, enum hrtimer_mode mode) { u64 delta; ktime_t soft, hard; soft = hrtimer_get_softexpires(timer); hard = hrtimer_get_expires(timer); delta = ktime_to_ns(ktime_sub(hard, soft)); hrtimer_start_range_ns(timer, soft, delta, mode); } void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, enum hrtimer_mode mode); static inline void hrtimer_restart(struct hrtimer *timer) { hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } /* Query timers: */ extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust); static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer) { return __hrtimer_get_remaining(timer, false); } extern u64 hrtimer_get_next_event(void); extern u64 hrtimer_next_event_without(const struct hrtimer *exclude); extern bool hrtimer_active(const struct hrtimer *timer); /** * hrtimer_is_queued = check, whether the timer is on one of the queues * @timer: Timer to check * * Returns: True if the timer is queued, false otherwise * * The function can be used lockless, but it gives only a current snapshot. */ static inline bool hrtimer_is_queued(struct hrtimer *timer) { /* The READ_ONCE pairs with the update functions of timer->state */ return !!(READ_ONCE(timer->state) & HRTIMER_STATE_ENQUEUED); } /* * Helper function to check, whether the timer is running the callback * function */ static inline int hrtimer_callback_running(struct hrtimer *timer) { return timer->base->running == timer; } /* Forward a hrtimer so it expires after now: */ extern u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval); /** * hrtimer_forward_now - forward the timer expiry so it expires after now * @timer: hrtimer to forward * @interval: the interval to forward * * Forward the timer expiry so it will expire after the current time * of the hrtimer clock base. Returns the number of overruns. * * Can be safely called from the callback function of @timer. If * called from other contexts @timer must neither be enqueued nor * running the callback and the caller needs to take care of * serialization. * * Note: This only updates the timer expiry value and does not requeue * the timer. */ static inline u64 hrtimer_forward_now(struct hrtimer *timer, ktime_t interval) { return hrtimer_forward(timer, timer->base->get_time(), interval); } /* Precise sleep: */ extern int nanosleep_copyout(struct restart_block *, struct timespec64 *); extern long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, const clockid_t clockid); extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta, const enum hrtimer_mode mode); extern int schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, const enum hrtimer_mode mode, clockid_t clock_id); extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); /* Soft interrupt function to run the hrtimer queues: */ extern void hrtimer_run_queues(void); /* Bootup initialization: */ extern void __init hrtimers_init(void); /* Show pending timers: */ extern void sysrq_timer_list_show(void); int hrtimers_prepare_cpu(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU int hrtimers_dead_cpu(unsigned int cpu); #else #define hrtimers_dead_cpu NULL #endif #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_SEQLOCK_H #define __LINUX_SEQLOCK_H /* * seqcount_t / seqlock_t - a reader-writer consistency mechanism with * lockless readers (read-only retry loops), and no writer starvation. * * See Documentation/locking/seqlock.rst * * Copyrights: * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli * - Sequence counters with associated locks, (C) 2020 Linutronix GmbH */ #include <linux/compiler.h> #include <linux/kcsan-checks.h> #include <linux/lockdep.h> #include <linux/mutex.h> #include <linux/ww_mutex.h> #include <linux/preempt.h> #include <linux/spinlock.h> #include <asm/processor.h> /* * The seqlock seqcount_t interface does not prescribe a precise sequence of * read begin/retry/end. For readers, typically there is a call to * read_seqcount_begin() and read_seqcount_retry(), however, there are more * esoteric cases which do not follow this pattern. * * As a consequence, we take the following best-effort approach for raw usage * via seqcount_t under KCSAN: upon beginning a seq-reader critical section, * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as * atomics; if there is a matching read_seqcount_retry() call, no following * memory operations are considered atomic. Usage of the seqlock_t interface * is not affected. */ #define KCSAN_SEQLOCK_REGION_MAX 1000 /* * Sequence counters (seqcount_t) * * This is the raw counting mechanism, without any writer protection. * * Write side critical sections must be serialized and non-preemptible. * * If readers can be invoked from hardirq or softirq contexts, * interrupts or bottom halves must also be respectively disabled before * entering the write section. * * This mechanism can't be used if the protected data contains pointers, * as the writer can invalidate a pointer that a reader is following. * * If the write serialization mechanism is one of the common kernel * locking primitives, use a sequence counter with associated lock * (seqcount_LOCKNAME_t) instead. * * If it's desired to automatically handle the sequence counter writer * serialization and non-preemptibility requirements, use a sequential * lock (seqlock_t) instead. * * See Documentation/locking/seqlock.rst */ typedef struct seqcount { unsigned sequence; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif } seqcount_t; static inline void __seqcount_init(seqcount_t *s, const char *name, struct lock_class_key *key) { /* * Make sure we are not reinitializing a held lock: */ lockdep_init_map(&s->dep_map, name, key, 0); s->sequence = 0; } #ifdef CONFIG_DEBUG_LOCK_ALLOC # define SEQCOUNT_DEP_MAP_INIT(lockname) \ .dep_map = { .name = #lockname } /** * seqcount_init() - runtime initializer for seqcount_t * @s: Pointer to the seqcount_t instance */ # define seqcount_init(s) \ do { \ static struct lock_class_key __key; \ __seqcount_init((s), #s, &__key); \ } while (0) static inline void seqcount_lockdep_reader_access(const seqcount_t *s) { seqcount_t *l = (seqcount_t *)s; unsigned long flags; local_irq_save(flags); seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_); seqcount_release(&l->dep_map, _RET_IP_); local_irq_restore(flags); } #else # define SEQCOUNT_DEP_MAP_INIT(lockname) # define seqcount_init(s) __seqcount_init(s, NULL, NULL) # define seqcount_lockdep_reader_access(x) #endif /** * SEQCNT_ZERO() - static initializer for seqcount_t * @name: Name of the seqcount_t instance */ #define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) } /* * Sequence counters with associated locks (seqcount_LOCKNAME_t) * * A sequence counter which associates the lock used for writer * serialization at initialization time. This enables lockdep to validate * that the write side critical section is properly serialized. * * For associated locks which do not implicitly disable preemption, * preemption protection is enforced in the write side function. * * Lockdep is never used in any for the raw write variants. * * See Documentation/locking/seqlock.rst */ /* * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot * disable preemption. It can lead to higher latencies, and the write side * sections will not be able to acquire locks which become sleeping locks * (e.g. spinlock_t). * * To remain preemptible while avoiding a possible livelock caused by the * reader preempting the writer, use a different technique: let the reader * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the * case, acquire then release the associated LOCKNAME writer serialization * lock. This will allow any possibly-preempted writer to make progress * until the end of its writer serialization lock critical section. * * This lock-unlock technique must be implemented for all of PREEMPT_RT * sleeping locks. See Documentation/locking/locktypes.rst */ #if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT) #define __SEQ_LOCK(expr) expr #else #define __SEQ_LOCK(expr) #endif /* * typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated * @seqcount: The real sequence counter * @lock: Pointer to the associated lock * * A plain sequence counter with external writer synchronization by * LOCKNAME @lock. The lock is associated to the sequence counter in the * static initializer or init function. This enables lockdep to validate * that the write side critical section is properly serialized. * * LOCKNAME: raw_spinlock, spinlock, rwlock, mutex, or ww_mutex. */ /* * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t * @s: Pointer to the seqcount_LOCKNAME_t instance * @lock: Pointer to the associated lock */ #define seqcount_LOCKNAME_init(s, _lock, lockname) \ do { \ seqcount_##lockname##_t *____s = (s); \ seqcount_init(&____s->seqcount); \ __SEQ_LOCK(____s->lock = (_lock)); \ } while (0) #define seqcount_raw_spinlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, raw_spinlock) #define seqcount_spinlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, spinlock) #define seqcount_rwlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, rwlock); #define seqcount_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, mutex); #define seqcount_ww_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, ww_mutex); /* * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers * seqprop_LOCKNAME_*() - Property accessors for seqcount_LOCKNAME_t * * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t * @locktype: LOCKNAME canonical C data type * @preemptible: preemptibility of above locktype * @lockmember: argument for lockdep_assert_held() * @lockbase: associated lock release function (prefix only) * @lock_acquire: associated lock acquisition function (full call) */ #define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember, lockbase, lock_acquire) \ typedef struct seqcount_##lockname { \ seqcount_t seqcount; \ __SEQ_LOCK(locktype *lock); \ } seqcount_##lockname##_t; \ \ static __always_inline seqcount_t * \ __seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ { \ return &s->seqcount; \ } \ \ static __always_inline unsigned \ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ { \ unsigned seq = READ_ONCE(s->seqcount.sequence); \ \ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ return seq; \ \ if (preemptible && unlikely(seq & 1)) { \ __SEQ_LOCK(lock_acquire); \ __SEQ_LOCK(lockbase##_unlock(s->lock)); \ \ /* \ * Re-read the sequence counter since the (possibly \ * preempted) writer made progress. \ */ \ seq = READ_ONCE(s->seqcount.sequence); \ } \ \ return seq; \ } \ \ static __always_inline bool \ __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \ { \ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ return preemptible; \ \ /* PREEMPT_RT relies on the above LOCK+UNLOCK */ \ return false; \ } \ \ static __always_inline void \ __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ { \ __SEQ_LOCK(lockdep_assert_held(lockmember)); \ } /* * __seqprop() for seqcount_t */ static inline seqcount_t *__seqprop_ptr(seqcount_t *s) { return s; } static inline unsigned __seqprop_sequence(const seqcount_t *s) { return READ_ONCE(s->sequence); } static inline bool __seqprop_preemptible(const seqcount_t *s) { return false; } static inline void __seqprop_assert(const seqcount_t *s) { lockdep_assert_preemption_disabled(); } #define __SEQ_RT IS_ENABLED(CONFIG_PREEMPT_RT) SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock, raw_spin, raw_spin_lock(s->lock)) SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock)) SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock)) SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock)) SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mutex, ww_mutex_lock(s->lock, NULL)) /* * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t * @name: Name of the seqcount_LOCKNAME_t instance * @lock: Pointer to the associated LOCKNAME */ #define SEQCOUNT_LOCKNAME_ZERO(seq_name, assoc_lock) { \ .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ __SEQ_LOCK(.lock = (assoc_lock)) \ } #define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define SEQCNT_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define SEQCNT_RWLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define SEQCNT_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define __seqprop_case(s, lockname, prop) \ seqcount_##lockname##_t: __seqprop_##lockname##_##prop((void *)(s)) #define __seqprop(s, prop) _Generic(*(s), \ seqcount_t: __seqprop_##prop((void *)(s)), \ __seqprop_case((s), raw_spinlock, prop), \ __seqprop_case((s), spinlock, prop), \ __seqprop_case((s), rwlock, prop), \ __seqprop_case((s), mutex, prop), \ __seqprop_case((s), ww_mutex, prop)) #define __seqcount_ptr(s) __seqprop(s, ptr) #define __seqcount_sequence(s) __seqprop(s, sequence) #define __seqcount_lock_preemptible(s) __seqprop(s, preemptible) #define __seqcount_assert_lock_held(s) __seqprop(s, assert) /** * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() * barrier. Callers should ensure that smp_rmb() or equivalent ordering is * provided before actually loading any of the variables that are to be * protected in this critical section. * * Use carefully, only in critical code, and comment how the barrier is * provided. * * Return: count to be passed to read_seqcount_retry() */ #define __read_seqcount_begin(s) \ ({ \ unsigned seq; \ \ while ((seq = __seqcount_sequence(s)) & 1) \ cpu_relax(); \ \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ seq; \ }) /** * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * Return: count to be passed to read_seqcount_retry() */ #define raw_read_seqcount_begin(s) \ ({ \ unsigned seq = __read_seqcount_begin(s); \ \ smp_rmb(); \ seq; \ }) /** * read_seqcount_begin() - begin a seqcount_t read critical section * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * Return: count to be passed to read_seqcount_retry() */ #define read_seqcount_begin(s) \ ({ \ seqcount_lockdep_reader_access(__seqcount_ptr(s)); \ raw_read_seqcount_begin(s); \ }) /** * raw_read_seqcount() - read the raw seqcount_t counter value * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * raw_read_seqcount opens a read critical section of the given * seqcount_t, without any lockdep checking, and without checking or * masking the sequence counter LSB. Calling code is responsible for * handling that. * * Return: count to be passed to read_seqcount_retry() */ #define raw_read_seqcount(s) \ ({ \ unsigned seq = __seqcount_sequence(s); \ \ smp_rmb(); \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ seq; \ }) /** * raw_seqcount_begin() - begin a seqcount_t read critical section w/o * lockdep and w/o counter stabilization * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * raw_seqcount_begin opens a read critical section of the given * seqcount_t. Unlike read_seqcount_begin(), this function will not wait * for the count to stabilize. If a writer is active when it begins, it * will fail the read_seqcount_retry() at the end of the read critical * section instead of stabilizing at the beginning of it. * * Use this only in special kernel hot paths where the read section is * small and has a high probability of success through other external * means. It will save a single branching instruction. * * Return: count to be passed to read_seqcount_retry() */ #define raw_seqcount_begin(s) \ ({ \ /* \ * If the counter is odd, let read_seqcount_retry() fail \ * by decrementing the counter. \ */ \ raw_read_seqcount(s) & ~1; \ }) /** * __read_seqcount_retry() - end a seqcount_t read section w/o barrier * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * @start: count, from read_seqcount_begin() * * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() * barrier. Callers should ensure that smp_rmb() or equivalent ordering is * provided before actually loading any of the variables that are to be * protected in this critical section. * * Use carefully, only in critical code, and comment how the barrier is * provided. * * Return: true if a read section retry is required, else false */ #define __read_seqcount_retry(s, start) \ __read_seqcount_t_retry(__seqcount_ptr(s), start) static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start) { kcsan_atomic_next(0); return unlikely(READ_ONCE(s->sequence) != start); } /** * read_seqcount_retry() - end a seqcount_t read critical section * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * @start: count, from read_seqcount_begin() * * read_seqcount_retry closes the read critical section of given * seqcount_t. If the critical section was invalid, it must be ignored * (and typically retried). * * Return: true if a read section retry is required, else false */ #define read_seqcount_retry(s, start) \ read_seqcount_t_retry(__seqcount_ptr(s), start) static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start) { smp_rmb(); return __read_seqcount_t_retry(s, start); } /** * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants */ #define raw_write_seqcount_begin(s) \ do { \ if (__seqcount_lock_preemptible(s)) \ preempt_disable(); \ \ raw_write_seqcount_t_begin(__seqcount_ptr(s)); \ } while (0) static inline void raw_write_seqcount_t_begin(seqcount_t *s) { kcsan_nestable_atomic_begin(); s->sequence++; smp_wmb(); } /** * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants */ #define raw_write_seqcount_end(s) \ do { \ raw_write_seqcount_t_end(__seqcount_ptr(s)); \ \ if (__seqcount_lock_preemptible(s)) \ preempt_enable(); \ } while (0) static inline void raw_write_seqcount_t_end(seqcount_t *s) { smp_wmb(); s->sequence++; kcsan_nestable_atomic_end(); } /** * write_seqcount_begin_nested() - start a seqcount_t write section with * custom lockdep nesting level * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * @subclass: lockdep nesting level * * See Documentation/locking/lockdep-design.rst */ #define write_seqcount_begin_nested(s, subclass) \ do { \ __seqcount_assert_lock_held(s); \ \ if (__seqcount_lock_preemptible(s)) \ preempt_disable(); \ \ write_seqcount_t_begin_nested(__seqcount_ptr(s), subclass); \ } while (0) static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass) { raw_write_seqcount_t_begin(s); seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); } /** * write_seqcount_begin() - start a seqcount_t write side critical section * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * write_seqcount_begin opens a write side critical section of the given * seqcount_t. * * Context: seqcount_t write side critical sections must be serialized and * non-preemptible. If readers can be invoked from hardirq or softirq * context, interrupts or bottom halves must be respectively disabled. */ #define write_seqcount_begin(s) \ do { \ __seqcount_assert_lock_held(s); \ \ if (__seqcount_lock_preemptible(s)) \ preempt_disable(); \ \ write_seqcount_t_begin(__seqcount_ptr(s)); \ } while (0) static inline void write_seqcount_t_begin(seqcount_t *s) { write_seqcount_t_begin_nested(s, 0); } /** * write_seqcount_end() - end a seqcount_t write side critical section * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * The write section must've been opened with write_seqcount_begin(). */ #define write_seqcount_end(s) \ do { \ write_seqcount_t_end(__seqcount_ptr(s)); \ \ if (__seqcount_lock_preemptible(s)) \ preempt_enable(); \ } while (0) static inline void write_seqcount_t_end(seqcount_t *s) { seqcount_release(&s->dep_map, _RET_IP_); raw_write_seqcount_t_end(s); } /** * raw_write_seqcount_barrier() - do a seqcount_t write barrier * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * This can be used to provide an ordering guarantee instead of the usual * consistency guarantee. It is one wmb cheaper, because it can collapse * the two back-to-back wmb()s. * * Note that writes surrounding the barrier should be declared atomic (e.g. * via WRITE_ONCE): a) to ensure the writes become visible to other threads * atomically, avoiding compiler optimizations; b) to document which writes are * meant to propagate to the reader critical section. This is necessary because * neither writes before and after the barrier are enclosed in a seq-writer * critical section that would ensure readers are aware of ongoing writes:: * * seqcount_t seq; * bool X = true, Y = false; * * void read(void) * { * bool x, y; * * do { * int s = read_seqcount_begin(&seq); * * x = X; y = Y; * * } while (read_seqcount_retry(&