1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * RNG: Random Number Generator algorithms under the crypto API * * Copyright (c) 2008 Neil Horman <nhorman@tuxdriver.com> * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_RNG_H #define _CRYPTO_RNG_H #include <linux/crypto.h> struct crypto_rng; /** * struct rng_alg - random number generator definition * * @generate: The function defined by this variable obtains a * random number. The random number generator transform * must generate the random number out of the context * provided with this call, plus any additional data * if provided to the call. * @seed: Seed or reseed the random number generator. With the * invocation of this function call, the random number * generator shall become ready for generation. If the * random number generator requires a seed for setting * up a new state, the seed must be provided by the * consumer while invoking this function. The required * size of the seed is defined with @seedsize . * @set_ent: Set entropy that would otherwise be obtained from * entropy source. Internal use only. * @seedsize: The seed size required for a random number generator * initialization defined with this variable. Some * random number generators does not require a seed * as the seeding is implemented internally without * the need of support by the consumer. In this case, * the seed size is set to zero. * @base: Common crypto API algorithm data structure. */ struct rng_alg { int (*generate)(struct crypto_rng *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int dlen); int (*seed)(struct crypto_rng *tfm, const u8 *seed, unsigned int slen); void (*set_ent)(struct crypto_rng *tfm, const u8 *data, unsigned int len); unsigned int seedsize; struct crypto_alg base; }; struct crypto_rng { struct crypto_tfm base; }; extern struct crypto_rng *crypto_default_rng; int crypto_get_default_rng(void); void crypto_put_default_rng(void); /** * DOC: Random number generator API * * The random number generator API is used with the ciphers of type * CRYPTO_ALG_TYPE_RNG (listed as type "rng" in /proc/crypto) */ /** * crypto_alloc_rng() -- allocate RNG handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * message digest cipher * @type: specifies the type of the cipher * @mask: specifies the mask for the cipher * * Allocate a cipher handle for a random number generator. The returned struct * crypto_rng is the cipher handle that is required for any subsequent * API invocation for that random number generator. * * For all random number generators, this call creates a new private copy of * the random number generator that does not share a state with other * instances. The only exception is the "krng" random number generator which * is a kernel crypto API use case for the get_random_bytes() function of the * /dev/random driver. * * Return: allocated cipher handle in case of success; IS_ERR() is true in case * of an error, PTR_ERR() returns the error code. */ struct crypto_rng *crypto_alloc_rng(const char *alg_name, u32 type, u32 mask); static inline struct crypto_tfm *crypto_rng_tfm(struct crypto_rng *tfm) { return &tfm->base; } /** * crypto_rng_alg - obtain name of RNG * @tfm: cipher handle * * Return the generic name (cra_name) of the initialized random number generator * * Return: generic name string */ static inline struct rng_alg *crypto_rng_alg(struct crypto_rng *tfm) { return container_of(crypto_rng_tfm(tfm)->__crt_alg, struct rng_alg, base); } /** * crypto_free_rng() - zeroize and free RNG handle * @tfm: cipher handle to be freed * * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_rng(struct crypto_rng *tfm) { crypto_destroy_tfm(tfm, crypto_rng_tfm(tfm)); } /** * crypto_rng_generate() - get random number * @tfm: cipher handle * @src: Input buffer holding additional data, may be NULL * @slen: Length of additional data * @dst: output buffer holding the random numbers * @dlen: length of the output buffer * * This function fills the caller-allocated buffer with random * numbers using the random number generator referenced by the * cipher handle. * * Return: 0 function was successful; < 0 if an error occurred */ static inline int crypto_rng_generate(struct crypto_rng *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int dlen) { struct crypto_alg *alg = tfm->base.__crt_alg; int ret; crypto_stats_get(alg); ret = crypto_rng_alg(tfm)->generate(tfm, src, slen, dst, dlen); crypto_stats_rng_generate(alg, dlen, ret); return ret; } /** * crypto_rng_get_bytes() - get random number * @tfm: cipher handle * @rdata: output buffer holding the random numbers * @dlen: length of the output buffer * * This function fills the caller-allocated buffer with random numbers using the * random number generator referenced by the cipher handle. * * Return: 0 function was successful; < 0 if an error occurred */ static inline int crypto_rng_get_bytes(struct crypto_rng *tfm, u8 *rdata, unsigned int dlen) { return crypto_rng_generate(tfm, NULL, 0, rdata, dlen); } /** * crypto_rng_reset() - re-initialize the RNG * @tfm: cipher handle * @seed: seed input data * @slen: length of the seed input data * * The reset function completely re-initializes the random number generator * referenced by the cipher handle by clearing the current state. The new state * is initialized with the caller provided seed or automatically, depending * on the random number generator type (the ANSI X9.31 RNG requires * caller-provided seed, the SP800-90A DRBGs perform an automatic seeding). * The seed is provided as a parameter to this function call. The provided seed * should have the length of the seed size defined for the random number * generator as defined by crypto_rng_seedsize. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen); /** * crypto_rng_seedsize() - obtain seed size of RNG * @tfm: cipher handle * * The function returns the seed size for the random number generator * referenced by the cipher handle. This value may be zero if the random * number generator does not implement or require a reseeding. For example, * the SP800-90A DRBGs implement an automated reseeding after reaching a * pre-defined threshold. * * Return: seed size for the random number generator */ static inline int crypto_rng_seedsize(struct crypto_rng *tfm) { return crypto_rng_alg(tfm)->seedsize; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM xdp #if !defined(_TRACE_XDP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_XDP_H #include <linux/netdevice.h> #include <linux/filter.h> #include <linux/tracepoint.h> #include <linux/bpf.h> #define __XDP_ACT_MAP(FN) \ FN(ABORTED) \ FN(DROP) \ FN(PASS) \ FN(TX) \ FN(REDIRECT) #define __XDP_ACT_TP_FN(x) \ TRACE_DEFINE_ENUM(XDP_##x); #define __XDP_ACT_SYM_FN(x) \ { XDP_##x, #x }, #define __XDP_ACT_SYM_TAB \ __XDP_ACT_MAP(__XDP_ACT_SYM_FN) { -1, NULL } __XDP_ACT_MAP(__XDP_ACT_TP_FN) TRACE_EVENT(xdp_exception, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, u32 act), TP_ARGS(dev, xdp, act), TP_STRUCT__entry( __field(int, prog_id) __field(u32, act) __field(int, ifindex) ), TP_fast_assign( __entry->prog_id = xdp->aux->id; __entry->act = act; __entry->ifindex = dev->ifindex; ), TP_printk("prog_id=%d action=%s ifindex=%d", __entry->prog_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->ifindex) ); TRACE_EVENT(xdp_bulk_tx, TP_PROTO(const struct net_device *dev, int sent, int drops, int err), TP_ARGS(dev, sent, drops, err), TP_STRUCT__entry( __field(int, ifindex) __field(u32, act) __field(int, drops) __field(int, sent) __field(int, err) ), TP_fast_assign( __entry->ifindex = dev->ifindex; __entry->act = XDP_TX; __entry->drops = drops; __entry->sent = sent; __entry->err = err; ), TP_printk("ifindex=%d action=%s sent=%d drops=%d err=%d", __entry->ifindex, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->sent, __entry->drops, __entry->err) ); #ifndef __DEVMAP_OBJ_TYPE #define __DEVMAP_OBJ_TYPE struct _bpf_dtab_netdev { struct net_device *dev; }; #endif /* __DEVMAP_OBJ_TYPE */ #define devmap_ifindex(tgt, map) \ (((map->map_type == BPF_MAP_TYPE_DEVMAP || \ map->map_type == BPF_MAP_TYPE_DEVMAP_HASH)) ? \ ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex : 0) DECLARE_EVENT_CLASS(xdp_redirect_template, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, const struct bpf_map *map, u32 index), TP_ARGS(dev, xdp, tgt, err, map, index), TP_STRUCT__entry( __field(int, prog_id) __field(u32, act) __field(int, ifindex) __field(int, err) __field(int, to_ifindex) __field(u32, map_id) __field(int, map_index) ), TP_fast_assign( __entry->prog_id = xdp->aux->id; __entry->act = XDP_REDIRECT; __entry->ifindex = dev->ifindex; __entry->err = err; __entry->to_ifindex = map ? devmap_ifindex(tgt, map) : index; __entry->map_id = map ? map->id : 0; __entry->map_index = map ? index : 0; ), TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" " map_id=%d map_index=%d", __entry->prog_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->ifindex, __entry->to_ifindex, __entry->err, __entry->map_id, __entry->map_index) ); DEFINE_EVENT(xdp_redirect_template, xdp_redirect, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, const struct bpf_map *map, u32 index), TP_ARGS(dev, xdp, tgt, err, map, index) ); DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, const struct bpf_map *map, u32 index), TP_ARGS(dev, xdp, tgt, err, map, index) ); #define _trace_xdp_redirect(dev, xdp, to) \ trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to); #define _trace_xdp_redirect_err(dev, xdp, to, err) \ trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to); #define _trace_xdp_redirect_map(dev, xdp, to, map, index) \ trace_xdp_redirect(dev, xdp, to, 0, map, index); #define _trace_xdp_redirect_map_err(dev, xdp, to, map, index, err) \ trace_xdp_redirect_err(dev, xdp, to, err, map, index); /* not used anymore, but kept around so as not to break old programs */ DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, const struct bpf_map *map, u32 index), TP_ARGS(dev, xdp, tgt, err, map, index) ); DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, const struct bpf_map *map, u32 index), TP_ARGS(dev, xdp, tgt, err, map, index) ); TRACE_EVENT(xdp_cpumap_kthread, TP_PROTO(int map_id, unsigned int processed, unsigned int drops, int sched, struct xdp_cpumap_stats *xdp_stats), TP_ARGS(map_id, processed, drops, sched, xdp_stats), TP_STRUCT__entry( __field(int, map_id) __field(u32, act) __field(int, cpu) __field(unsigned int, drops) __field(unsigned int, processed) __field(int, sched) __field(unsigned int, xdp_pass) __field(unsigned int, xdp_drop) __field(unsigned int, xdp_redirect) ), TP_fast_assign( __entry->map_id = map_id; __entry->act = XDP_REDIRECT; __entry->cpu = smp_processor_id(); __entry->drops = drops; __entry->processed = processed; __entry->sched = sched; __entry->xdp_pass = xdp_stats->pass; __entry->xdp_drop = xdp_stats->drop; __entry->xdp_redirect = xdp_stats->redirect; ), TP_printk("kthread" " cpu=%d map_id=%d action=%s" " processed=%u drops=%u" " sched=%d" " xdp_pass=%u xdp_drop=%u xdp_redirect=%u", __entry->cpu, __entry->map_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->processed, __entry->drops, __entry->sched, __entry->xdp_pass, __entry->xdp_drop, __entry->xdp_redirect) ); TRACE_EVENT(xdp_cpumap_enqueue, TP_PROTO(int map_id, unsigned int processed, unsigned int drops, int to_cpu), TP_ARGS(map_id, processed, drops, to_cpu), TP_STRUCT__entry( __field(int, map_id) __field(u32, act) __field(int, cpu) __field(unsigned int, drops) __field(unsigned int, processed) __field(int, to_cpu) ), TP_fast_assign( __entry->map_id = map_id; __entry->act = XDP_REDIRECT; __entry->cpu = smp_processor_id(); __entry->drops = drops; __entry->processed = processed; __entry->to_cpu = to_cpu; ), TP_printk("enqueue" " cpu=%d map_id=%d action=%s" " processed=%u drops=%u" " to_cpu=%d", __entry->cpu, __entry->map_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->processed, __entry->drops, __entry->to_cpu) ); TRACE_EVENT(xdp_devmap_xmit, TP_PROTO(const struct net_device *from_dev, const struct net_device *to_dev, int sent, int drops, int err), TP_ARGS(from_dev, to_dev, sent, drops, err), TP_STRUCT__entry( __field(int, from_ifindex) __field(u32, act) __field(int, to_ifindex) __field(int, drops) __field(int, sent) __field(int, err) ), TP_fast_assign( __entry->from_ifindex = from_dev->ifindex; __entry->act = XDP_REDIRECT; __entry->to_ifindex = to_dev->ifindex; __entry->drops = drops; __entry->sent = sent; __entry->err = err; ), TP_printk("ndo_xdp_xmit" " from_ifindex=%d to_ifindex=%d action=%s" " sent=%d drops=%d" " err=%d", __entry->from_ifindex, __entry->to_ifindex, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->sent, __entry->drops, __entry->err) ); /* Expect users already include <net/xdp.h>, but not xdp_priv.h */ #include <net/xdp_priv.h> #define __MEM_TYPE_MAP(FN) \ FN(PAGE_SHARED) \ FN(PAGE_ORDER0) \ FN(PAGE_POOL) \ FN(XSK_BUFF_POOL) #define __MEM_TYPE_TP_FN(x) \ TRACE_DEFINE_ENUM(MEM_TYPE_##x); #define __MEM_TYPE_SYM_FN(x) \ { MEM_TYPE_##x, #x }, #define __MEM_TYPE_SYM_TAB \ __MEM_TYPE_MAP(__MEM_TYPE_SYM_FN) { -1, 0 } __MEM_TYPE_MAP(__MEM_TYPE_TP_FN) TRACE_EVENT(mem_disconnect, TP_PROTO(const struct xdp_mem_allocator *xa), TP_ARGS(xa), TP_STRUCT__entry( __field(const struct xdp_mem_allocator *, xa) __field(u32, mem_id) __field(u32, mem_type) __field(const void *, allocator) ), TP_fast_assign( __entry->xa = xa; __entry->mem_id = xa->mem.id; __entry->mem_type = xa->mem.type; __entry->allocator = xa->allocator; ), TP_printk("mem_id=%d mem_type=%s allocator=%p", __entry->mem_id, __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), __entry->allocator ) ); TRACE_EVENT(mem_connect, TP_PROTO(const struct xdp_mem_allocator *xa, const struct xdp_rxq_info *rxq), TP_ARGS(xa, rxq), TP_STRUCT__entry( __field(const struct xdp_mem_allocator *, xa) __field(u32, mem_id) __field(u32, mem_type) __field(const void *, allocator) __field(const struct xdp_rxq_info *, rxq) __field(int, ifindex) ), TP_fast_assign( __entry->xa = xa; __entry->mem_id = xa->mem.id; __entry->mem_type = xa->mem.type; __entry->allocator = xa->allocator; __entry->rxq = rxq; __entry->ifindex = rxq->dev->ifindex; ), TP_printk("mem_id=%d mem_type=%s allocator=%p" " ifindex=%d", __entry->mem_id, __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), __entry->allocator, __entry->ifindex ) ); TRACE_EVENT(mem_return_failed, TP_PROTO(const struct xdp_mem_info *mem, const struct page *page), TP_ARGS(mem, page), TP_STRUCT__entry( __field(const struct page *, page) __field(u32, mem_id) __field(u32, mem_type) ), TP_fast_assign( __entry->page = page; __entry->mem_id = mem->id; __entry->mem_type = mem->type; ), TP_printk("mem_id=%d mem_type=%s page=%p", __entry->mem_id, __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), __entry->page ) ); #endif /* _TRACE_XDP_H */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_CHECKSUM_64_H #define _ASM_X86_CHECKSUM_64_H /* * Checksums for x86-64 * Copyright 2002 by Andi Kleen, SuSE Labs * with some code from asm-x86/checksum.h */ #include <linux/compiler.h> #include <linux/uaccess.h> #include <asm/byteorder.h> /** * csum_fold - Fold and invert a 32bit checksum. * sum: 32bit unfolded sum * * Fold a 32bit running checksum to 16bit and invert it. This is usually * the last step before putting a checksum into a packet. * Make sure not to mix with 64bit checksums. */ static inline __sum16 csum_fold(__wsum sum) { asm(" addl %1,%0\n" " adcl $0xffff,%0" : "=r" (sum) : "r" ((__force u32)sum << 16), "0" ((__force u32)sum & 0xffff0000)); return (__force __sum16)(~(__force u32)sum >> 16); } /* * This is a version of ip_compute_csum() optimized for IP headers, * which always checksum on 4 octet boundaries. * * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by * Arnt Gulbrandsen. */ /** * ip_fast_csum - Compute the IPv4 header checksum efficiently. * iph: ipv4 header * ihl: length of header / 4 */ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { unsigned int sum; asm(" movl (%1), %0\n" " subl $4, %2\n" " jbe 2f\n" " addl 4(%1), %0\n" " adcl 8(%1), %0\n" " adcl 12(%1), %0\n" "1: adcl 16(%1), %0\n" " lea 4(%1), %1\n" " decl %2\n" " jne 1b\n" " adcl $0, %0\n" " movl %0, %2\n" " shrl $16, %0\n" " addw %w2, %w0\n" " adcl $0, %0\n" " notl %0\n" "2:" /* Since the input registers which are loaded with iph and ihl are modified, we must also specify them as outputs, or gcc will assume they contain their original values. */ : "=r" (sum), "=r" (iph), "=r" (ihl) : "1" (iph), "2" (ihl) : "memory"); return (__force __sum16)sum; } /** * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum. * @saddr: source address * @daddr: destination address * @len: length of packet * @proto: ip protocol of packet * @sum: initial sum to be added in (32bit unfolded) * * Returns the pseudo header checksum the input data. Result is * 32bit unfolded. */ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) { asm(" addl %1, %0\n" " adcl %2, %0\n" " adcl %3, %0\n" " adcl $0, %0\n" : "=r" (sum) : "g" (daddr), "g" (saddr), "g" ((len + proto)<<8), "0" (sum)); return sum; } /** * csum_tcpup_magic - Compute an IPv4 pseudo header checksum. * @saddr: source address * @daddr: destination address * @len: length of packet * @proto: ip protocol of packet * @sum: initial sum to be added in (32bit unfolded) * * Returns the 16bit pseudo header checksum the input data already * complemented and ready to be filled in. */ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) { return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); } /** * csum_partial - Compute an internet checksum. * @buff: buffer to be checksummed * @len: length of buffer. * @sum: initial sum to be added in (32bit unfolded) * * Returns the 32bit unfolded internet checksum of the buffer. * Before filling it in it needs to be csum_fold()'ed. * buff should be aligned to a 64bit boundary if possible. */ extern __wsum csum_partial(const void *buff, int len, __wsum sum); /* Do not call this directly. Use the wrappers below */ extern __visible __wsum csum_partial_copy_generic(const void *src, void *dst, int len); extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len); extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len); extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len); /** * ip_compute_csum - Compute an 16bit IP checksum. * @buff: buffer address. * @len: length of buffer. * * Returns the 16bit folded/inverted checksum of the passed buffer. * Ready to fill in. */ extern __sum16 ip_compute_csum(const void *buff, int len); /** * csum_ipv6_magic - Compute checksum of an IPv6 pseudo header. * @saddr: source address * @daddr: destination address * @len: length of packet * @proto: protocol of packet * @sum: initial sum (32bit unfolded) to be added in * * Computes an IPv6 pseudo header checksum. This sum is added the checksum * into UDP/TCP packets and contains some link layer information. * Returns the unfolded 32bit checksum. */ struct in6_addr; #define _HAVE_ARCH_IPV6_CSUM 1 extern __sum16 csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, __u32 len, __u8 proto, __wsum sum); static inline unsigned add32_with_carry(unsigned a, unsigned b) { asm("addl %2,%0\n\t" "adcl $0,%0" : "=r" (a) : "0" (a), "rm" (b)); return a; } #define HAVE_ARCH_CSUM_ADD static inline __wsum csum_add(__wsum csum, __wsum addend) { return (__force __wsum)add32_with_carry((__force unsigned)csum, (__force unsigned)addend); } #endif /* _ASM_X86_CHECKSUM_64_H */
1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 // SPDX-License-Identifier: GPL-2.0 /* * trace event based perf event profiling/tracing * * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com> */ #include <linux/module.h> #include <linux/kprobes.h> #include <linux/security.h> #include "trace.h" #include "trace_probe.h" static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; /* * Force it to be aligned to unsigned long to avoid misaligned accesses * suprises */ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) perf_trace_t; /* Count the events in use (per event id, not per instance) */ static int total_ref_count; static int perf_trace_event_perm(struct trace_event_call *tp_event, struct perf_event *p_event) { int ret; if (tp_event->perf_perm) { ret = tp_event->perf_perm(tp_event, p_event); if (ret) return ret; } /* * We checked and allowed to create parent, * allow children without checking. */ if (p_event->parent) return 0; /* * It's ok to check current process (owner) permissions in here, * because code below is called only via perf_event_open syscall. */ /* The ftrace function trace is allowed only for root. */ if (ftrace_event_is_function(tp_event)) { ret = perf_allow_tracepoint(&p_event->attr); if (ret) return ret; if (!is_sampling_event(p_event)) return 0; /* * We don't allow user space callchains for function trace * event, due to issues with page faults while tracing page * fault handler and its overall trickiness nature. */ if (!p_event->attr.exclude_callchain_user) return -EINVAL; /* * Same reason to disable user stack dump as for user space * callchains above. */ if (p_event->attr.sample_type & PERF_SAMPLE_STACK_USER) return -EINVAL; } /* No tracing, just counting, so no obvious leak */ if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) return 0; /* Some events are ok to be traced by non-root users... */ if (p_event->attach_state == PERF_ATTACH_TASK) { if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY) return 0; } /* * ...otherwise raw tracepoint data can be a severe data leak, * only allow root to have these. */ ret = perf_allow_tracepoint(&p_event->attr); if (ret) return ret; return 0; } static int perf_trace_event_reg(struct trace_event_call *tp_event, struct perf_event *p_event) { struct hlist_head __percpu *list; int ret = -ENOMEM; int cpu; p_event->tp_event = tp_event; if (tp_event->perf_refcount++ > 0) return 0; list = alloc_percpu(struct hlist_head); if (!list) goto fail; for_each_possible_cpu(cpu) INIT_HLIST_HEAD(per_cpu_ptr(list, cpu)); tp_event->perf_events = list; if (!total_ref_count) { char __percpu *buf; int i; for (i = 0; i < PERF_NR_CONTEXTS; i++) { buf = (char __percpu *)alloc_percpu(perf_trace_t); if (!buf) goto fail; perf_trace_buf[i] = buf; } } ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER, NULL); if (ret) goto fail; total_ref_count++; return 0; fail: if (!total_ref_count) { int i; for (i = 0; i < PERF_NR_CONTEXTS; i++) { free_percpu(perf_trace_buf[i]); perf_trace_buf[i] = NULL; } } if (!--tp_event->perf_refcount) { free_percpu(tp_event->perf_events); tp_event->perf_events = NULL; } return ret; } static void perf_trace_event_unreg(struct perf_event *p_event) { struct trace_event_call *tp_event = p_event->tp_event; int i; if (--tp_event->perf_refcount > 0) goto out; tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL); /* * Ensure our callback won't be called anymore. The buffers * will be freed after that. */ tracepoint_synchronize_unregister(); free_percpu(tp_event->perf_events); tp_event->perf_events = NULL; if (!--total_ref_count) { for (i = 0; i < PERF_NR_CONTEXTS; i++) { free_percpu(perf_trace_buf[i]); perf_trace_buf[i] = NULL; } } out: module_put(tp_event->mod); } static int perf_trace_event_open(struct perf_event *p_event) { struct trace_event_call *tp_event = p_event->tp_event; return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event); } static void perf_trace_event_close(struct perf_event *p_event) { struct trace_event_call *tp_event = p_event->tp_event; tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event); } static int perf_trace_event_init(struct trace_event_call *tp_event, struct perf_event *p_event) { int ret; ret = perf_trace_event_perm(tp_event, p_event); if (ret) return ret; ret = perf_trace_event_reg(tp_event, p_event); if (ret) return ret; ret = perf_trace_event_open(p_event); if (ret) { perf_trace_event_unreg(p_event); return ret; } return 0; } int perf_trace_init(struct perf_event *p_event) { struct trace_event_call *tp_event; u64 event_id = p_event->attr.config; int ret = -EINVAL; mutex_lock(&event_mutex); list_for_each_entry(tp_event, &ftrace_events, list) { if (tp_event->event.type == event_id && tp_event->class && tp_event->class->reg && try_module_get(tp_event->mod)) { ret = perf_trace_event_init(tp_event, p_event); if (ret) module_put(tp_event->mod); break; } } mutex_unlock(&event_mutex); return ret; } void perf_trace_destroy(struct perf_event *p_event) { mutex_lock(&event_mutex); perf_trace_event_close(p_event); perf_trace_event_unreg(p_event); mutex_unlock(&event_mutex); } #ifdef CONFIG_KPROBE_EVENTS int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe) { int ret; char *func = NULL; struct trace_event_call *tp_event; if (p_event->attr.kprobe_func) { func = kzalloc(KSYM_NAME_LEN, GFP_KERNEL); if (!func) return -ENOMEM; ret = strncpy_from_user( func, u64_to_user_ptr(p_event->attr.kprobe_func), KSYM_NAME_LEN); if (ret == KSYM_NAME_LEN) ret = -E2BIG; if (ret < 0) goto out; if (func[0] == '\0') { kfree(func); func = NULL; } } tp_event = create_local_trace_kprobe( func, (void *)(unsigned long)(p_event->attr.kprobe_addr), p_event->attr.probe_offset, is_retprobe); if (IS_ERR(tp_event)) { ret = PTR_ERR(tp_event); goto out; } mutex_lock(&event_mutex); ret = perf_trace_event_init(tp_event, p_event); if (ret) destroy_local_trace_kprobe(tp_event); mutex_unlock(&event_mutex); out: kfree(func); return ret; } void perf_kprobe_destroy(struct perf_event *p_event) { mutex_lock(&event_mutex); perf_trace_event_close(p_event); perf_trace_event_unreg(p_event); mutex_unlock(&event_mutex); destroy_local_trace_kprobe(p_event->tp_event); } #endif /* CONFIG_KPROBE_EVENTS */ #ifdef CONFIG_UPROBE_EVENTS int perf_uprobe_init(struct perf_event *p_event, unsigned long ref_ctr_offset, bool is_retprobe) { int ret; char *path = NULL; struct trace_event_call *tp_event; if (!p_event->attr.uprobe_path) return -EINVAL; path = strndup_user(u64_to_user_ptr(p_event->attr.uprobe_path), PATH_MAX); if (IS_ERR(path)) { ret = PTR_ERR(path); return (ret == -EINVAL) ? -E2BIG : ret; } if (path[0] == '\0') { ret = -EINVAL; goto out; } tp_event = create_local_trace_uprobe(path, p_event->attr.probe_offset, ref_ctr_offset, is_retprobe); if (IS_ERR(tp_event)) { ret = PTR_ERR(tp_event); goto out; } /* * local trace_uprobe need to hold event_mutex to call * uprobe_buffer_enable() and uprobe_buffer_disable(). * event_mutex is not required for local trace_kprobes. */ mutex_lock(&event_mutex); ret = perf_trace_event_init(tp_event, p_event); if (ret) destroy_local_trace_uprobe(tp_event); mutex_unlock(&event_mutex); out: kfree(path); return ret; } void perf_uprobe_destroy(struct perf_event *p_event) { mutex_lock(&event_mutex); perf_trace_event_close(p_event); perf_trace_event_unreg(p_event); mutex_unlock(&event_mutex); destroy_local_trace_uprobe(p_event->tp_event); } #endif /* CONFIG_UPROBE_EVENTS */ int perf_trace_add(struct perf_event *p_event, int flags) { struct trace_event_call *tp_event = p_event->tp_event; if (!(flags & PERF_EF_START)) p_event->hw.state = PERF_HES_STOPPED; /* * If TRACE_REG_PERF_ADD returns false; no custom action was performed * and we need to take the default action of enqueueing our event on * the right per-cpu hlist. */ if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event)) { struct hlist_head __percpu *pcpu_list; struct hlist_head *list; pcpu_list = tp_event->perf_events; if (WARN_ON_ONCE(!pcpu_list)) return -EINVAL; list = this_cpu_ptr(pcpu_list); hlist_add_head_rcu(&p_event->hlist_entry, list); } return 0; } void perf_trace_del(struct perf_event *p_event, int flags) { struct trace_event_call *tp_event = p_event->tp_event; /* * If TRACE_REG_PERF_DEL returns false; no custom action was performed * and we need to take the default action of dequeueing our event from * the right per-cpu hlist. */ if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event)) hlist_del_rcu(&p_event->hlist_entry); } void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp) { char *raw_data; int rctx; BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "perf buffer not large enough")) return NULL; *rctxp = rctx = perf_swevent_get_recursion_context(); if (rctx < 0) return NULL; if (regs) *regs = this_cpu_ptr(&__perf_regs[rctx]); raw_data = this_cpu_ptr(perf_trace_buf[rctx]); /* zero the dead bytes from align to not leak stack to user */ memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); return raw_data; } EXPORT_SYMBOL_GPL(perf_trace_buf_alloc); NOKPROBE_SYMBOL(perf_trace_buf_alloc); void perf_trace_buf_update(void *record, u16 type) { struct trace_entry *entry = record; int pc = preempt_count(); unsigned long flags; local_save_flags(flags); tracing_generic_entry_update(entry, type, flags, pc); } NOKPROBE_SYMBOL(perf_trace_buf_update); #ifdef CONFIG_FUNCTION_TRACER static void perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *pt_regs) { struct ftrace_entry *entry; struct perf_event *event; struct hlist_head head; struct pt_regs regs; int rctx; if ((unsigned long)ops->private != smp_processor_id()) return; event = container_of(ops, struct perf_event, ftrace_ops); /* * @event->hlist entry is NULL (per INIT_HLIST_NODE), and all * the perf code does is hlist_for_each_entry_rcu(), so we can * get away with simply setting the @head.first pointer in order * to create a singular list. */ head.first = &event->hlist_entry; #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ sizeof(u64)) - sizeof(u32)) BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE); memset(&regs, 0, sizeof(regs)); perf_fetch_caller_regs(&regs); entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx); if (!entry) return; entry->ip = ip; entry->parent_ip = parent_ip; perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN, 1, &regs, &head, NULL); #undef ENTRY_SIZE } static int perf_ftrace_function_register(struct perf_event *event) { struct ftrace_ops *ops = &event->ftrace_ops; ops->flags = FTRACE_OPS_FL_RCU; ops->func = perf_ftrace_function_call; ops->private = (void *)(unsigned long)nr_cpu_ids; return register_ftrace_function(ops); } static int perf_ftrace_function_unregister(struct perf_event *event) { struct ftrace_ops *ops = &event->ftrace_ops; int ret = unregister_ftrace_function(ops); ftrace_free_filter(ops); return ret; } int perf_ftrace_event_register(struct trace_event_call *call, enum trace_reg type, void *data) { struct perf_event *event = data; switch (type) { case TRACE_REG_REGISTER: case TRACE_REG_UNREGISTER: break; case TRACE_REG_PERF_REGISTER: case TRACE_REG_PERF_UNREGISTER: return 0; case TRACE_REG_PERF_OPEN: return perf_ftrace_function_register(data); case TRACE_REG_PERF_CLOSE: return perf_ftrace_function_unregister(data); case TRACE_REG_PERF_ADD: event->ftrace_ops.private = (void *)(unsigned long)smp_processor_id(); return 1; case TRACE_REG_PERF_DEL: event->ftrace_ops.private = (void *)(unsigned long)nr_cpu_ids; return 1; } return -EINVAL; } #endif /* CONFIG_FUNCTION_TRACER */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_RTNETLINK_H #define __NET_RTNETLINK_H #include <linux/rtnetlink.h> #include <net/netlink.h> typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, struct netlink_ext_ack *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); enum rtnl_link_flags { RTNL_FLAG_DOIT_UNLOCKED = 1, }; void rtnl_register(int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); int rtnl_register_module(struct module *owner, int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); int rtnl_unregister(int protocol, int msgtype); void rtnl_unregister_all(int protocol); static inline int rtnl_msg_family(const struct nlmsghdr *nlh) { if (nlmsg_len(nlh) >= sizeof(struct rtgenmsg)) return ((struct rtgenmsg *) nlmsg_data(nlh))->rtgen_family; else return AF_UNSPEC; } /** * struct rtnl_link_ops - rtnetlink link operations * * @list: Used internally * @kind: Identifier * @netns_refund: Physical device, move to init_net on netns exit * @maxtype: Highest device specific netlink attribute number * @policy: Netlink policy for device specific attribute validation * @validate: Optional validation function for netlink/changelink parameters * @priv_size: sizeof net_device private space * @setup: net_device setup function * @newlink: Function for configuring and registering a new device * @changelink: Function for changing parameters of an existing device * @dellink: Function to remove a device * @get_size: Function to calculate required room for dumping device * specific netlink attributes * @fill_info: Function to dump device specific netlink attributes * @get_xstats_size: Function to calculate required room for dumping device * specific statistics * @fill_xstats: Function to dump device specific statistics * @get_num_tx_queues: Function to determine number of transmit queues * to create when creating a new device. * @get_num_rx_queues: Function to determine number of receive queues * to create when creating a new device. * @get_link_net: Function to get the i/o netns of the device * @get_linkxstats_size: Function to calculate the required room for * dumping device-specific extended link stats * @fill_linkxstats: Function to dump device-specific extended link stats */ struct rtnl_link_ops { struct list_head list; const char *kind; size_t priv_size; void (*setup)(struct net_device *dev); bool netns_refund; unsigned int maxtype; const struct nla_policy *policy; int (*validate)(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack); int (*newlink)(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack); int (*changelink)(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack); void (*dellink)(struct net_device *dev, struct list_head *head); size_t (*get_size)(const struct net_device *dev); int (*fill_info)(struct sk_buff *skb, const struct net_device *dev); size_t (*get_xstats_size)(const struct net_device *dev); int (*fill_xstats)(struct sk_buff *skb, const struct net_device *dev); unsigned int (*get_num_tx_queues)(void); unsigned int (*get_num_rx_queues)(void); unsigned int slave_maxtype; const struct nla_policy *slave_policy; int (*slave_changelink)(struct net_device *dev, struct net_device *slave_dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack); size_t (*get_slave_size)(const struct net_device *dev, const struct net_device *slave_dev); int (*fill_slave_info)(struct sk_buff *skb, const struct net_device *dev, const struct net_device *slave_dev); struct net *(*get_link_net)(const struct net_device *dev); size_t (*get_linkxstats_size)(const struct net_device *dev, int attr); int (*fill_linkxstats)(struct sk_buff *skb, const struct net_device *dev, int *prividx, int attr); }; int __rtnl_link_register(struct rtnl_link_ops *ops); void __rtnl_link_unregister(struct rtnl_link_ops *ops); int rtnl_link_register(struct rtnl_link_ops *ops); void rtnl_link_unregister(struct rtnl_link_ops *ops); /** * struct rtnl_af_ops - rtnetlink address family operations * * @list: Used internally * @family: Address family * @fill_link_af: Function to fill IFLA_AF_SPEC with address family * specific netlink attributes. * @get_link_af_size: Function to calculate size of address family specific * netlink attributes. * @validate_link_af: Validate a IFLA_AF_SPEC attribute, must check attr * for invalid configuration settings. * @set_link_af: Function to parse a IFLA_AF_SPEC attribute and modify * net_device accordingly. */ struct rtnl_af_ops { struct list_head list; int family; int (*fill_link_af)(struct sk_buff *skb, const struct net_device *dev, u32 ext_filter_mask); size_t (*get_link_af_size)(const struct net_device *dev, u32 ext_filter_mask); int (*validate_link_af)(const struct net_device *dev, const struct nlattr *attr); int (*set_link_af)(struct net_device *dev, const struct nlattr *attr); int (*fill_stats_af)(struct sk_buff *skb, const struct net_device *dev); size_t (*get_stats_af_size)(const struct net_device *dev); }; void rtnl_af_register(struct rtnl_af_ops *ops); void rtnl_af_unregister(struct rtnl_af_ops *ops); struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]); struct net_device *rtnl_create_link(struct net *net, const char *ifname, unsigned char name_assign_type, const struct rtnl_link_ops *ops, struct nlattr *tb[], struct netlink_ext_ack *extack); int rtnl_delete_link(struct net_device *dev); int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm); int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, struct netlink_ext_ack *exterr); struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid); #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _INET_ECN_H_ #define _INET_ECN_H_ #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/if_vlan.h> #include <net/inet_sock.h> #include <net/dsfield.h> enum { INET_ECN_NOT_ECT = 0, INET_ECN_ECT_1 = 1, INET_ECN_ECT_0 = 2, INET_ECN_CE = 3, INET_ECN_MASK = 3, }; extern int sysctl_tunnel_ecn_log; static inline int INET_ECN_is_ce(__u8 dsfield) { return (dsfield & INET_ECN_MASK) == INET_ECN_CE; } static inline int INET_ECN_is_not_ect(__u8 dsfield) { return (dsfield & INET_ECN_MASK) == INET_ECN_NOT_ECT; } static inline int INET_ECN_is_capable(__u8 dsfield) { return dsfield & INET_ECN_ECT_0; } /* * RFC 3168 9.1.1 * The full-functionality option for ECN encapsulation is to copy the * ECN codepoint of the inside header to the outside header on * encapsulation if the inside header is not-ECT or ECT, and to set the * ECN codepoint of the outside header to ECT(0) if the ECN codepoint of * the inside header is CE. */ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) { outer &= ~INET_ECN_MASK; outer |= !INET_ECN_is_ce(inner) ? (inner & INET_ECN_MASK) : INET_ECN_ECT_0; return outer; } static inline void INET_ECN_xmit(struct sock *sk) { inet_sk(sk)->tos |= INET_ECN_ECT_0; if (inet6_sk(sk) != NULL) inet6_sk(sk)->tclass |= INET_ECN_ECT_0; } static inline void INET_ECN_dontxmit(struct sock *sk) { inet_sk(sk)->tos &= ~INET_ECN_MASK; if (inet6_sk(sk) != NULL) inet6_sk(sk)->tclass &= ~INET_ECN_MASK; } #define IP6_ECN_flow_init(label) do { \ (label) &= ~htonl(INET_ECN_MASK << 20); \ } while (0) #define IP6_ECN_flow_xmit(sk, label) do { \ if (INET_ECN_is_capable(inet6_sk(sk)->tclass)) \ (label) |= htonl(INET_ECN_ECT_0 << 20); \ } while (0) static inline int IP_ECN_set_ce(struct iphdr *iph) { u32 check = (__force u32)iph->check; u32 ecn = (iph->tos + 1) & INET_ECN_MASK; /* * After the last operation we have (in binary): * INET_ECN_NOT_ECT => 01 * INET_ECN_ECT_1 => 10 * INET_ECN_ECT_0 => 11 * INET_ECN_CE => 00 */ if (!(ecn & 2)) return !ecn; /* * The following gives us: * INET_ECN_ECT_1 => check += htons(0xFFFD) * INET_ECN_ECT_0 => check += htons(0xFFFE) */ check += (__force u16)htons(0xFFFB) + (__force u16)htons(ecn); iph->check = (__force __sum16)(check + (check>=0xFFFF)); iph->tos |= INET_ECN_CE; return 1; } static inline int IP_ECN_set_ect1(struct iphdr *iph) { u32 check = (__force u32)iph->check; if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0) return 0; check += (__force u16)htons(0x1); iph->check = (__force __sum16)(check + (check>=0xFFFF)); iph->tos ^= INET_ECN_MASK; return 1; } static inline void IP_ECN_clear(struct iphdr *iph) { iph->tos &= ~INET_ECN_MASK; } static inline void ipv4_copy_dscp(unsigned int dscp, struct iphdr *inner) { dscp &= ~INET_ECN_MASK; ipv4_change_dsfield(inner, INET_ECN_MASK, dscp); } struct ipv6hdr; /* Note: * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE, * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE * In IPv6 case, no checksum compensates the change in IPv6 header, * so we have to update skb->csum. */ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) { __be32 from, to; if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph))) return 0; from = *(__be32 *)iph; to = from | htonl(INET_ECN_CE << 20); *(__be32 *)iph = to; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), (__force __wsum)to); return 1; } static inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph) { __be32 from, to; if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0) return 0; from = *(__be32 *)iph; to = from ^ htonl(INET_ECN_MASK << 20); *(__be32 *)iph = to; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), (__force __wsum)to); return 1; } static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner) { dscp &= ~INET_ECN_MASK; ipv6_change_dsfield(inner, INET_ECN_MASK, dscp); } static inline int INET_ECN_set_ce(struct sk_buff *skb) { switch (skb_protocol(skb, true)) { case cpu_to_be16(ETH_P_IP): if (skb_network_header(skb) + sizeof(struct iphdr) <= skb_tail_pointer(skb)) return IP_ECN_set_ce(ip_hdr(skb)); break; case cpu_to_be16(ETH_P_IPV6): if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= skb_tail_pointer(skb)) return IP6_ECN_set_ce(skb, ipv6_hdr(skb)); break; } return 0; } static inline int INET_ECN_set_ect1(struct sk_buff *skb) { switch (skb_protocol(skb, true)) { case cpu_to_be16(ETH_P_IP): if (skb_network_header(skb) + sizeof(struct iphdr) <= skb_tail_pointer(skb)) return IP_ECN_set_ect1(ip_hdr(skb)); break; case cpu_to_be16(ETH_P_IPV6): if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= skb_tail_pointer(skb)) return IP6_ECN_set_ect1(skb, ipv6_hdr(skb)); break; } return 0; } /* * RFC 6040 4.2 * To decapsulate the inner header at the tunnel egress, a compliant * tunnel egress MUST set the outgoing ECN field to the codepoint at the * intersection of the appropriate arriving inner header (row) and outer * header (column) in Figure 4 * * +---------+------------------------------------------------+ * |Arriving | Arriving Outer Header | * | Inner +---------+------------+------------+------------+ * | Header | Not-ECT | ECT(0) | ECT(1) | CE | * +---------+---------+------------+------------+------------+ * | Not-ECT | Not-ECT |Not-ECT(!!!)|Not-ECT(!!!)| <drop>(!!!)| * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE | * | ECT(1) | ECT(1) | ECT(1) (!) | ECT(1) | CE | * | CE | CE | CE | CE(!!!)| CE | * +---------+---------+------------+------------+------------+ * * Figure 4: New IP in IP Decapsulation Behaviour * * returns 0 on success * 1 if something is broken and should be logged (!!! above) * 2 if packet should be dropped */ static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce) { if (INET_ECN_is_not_ect(inner)) { switch (outer & INET_ECN_MASK) { case INET_ECN_NOT_ECT: return 0; case INET_ECN_ECT_0: case INET_ECN_ECT_1: return 1; case INET_ECN_CE: return 2; } } *set_ce = INET_ECN_is_ce(outer); return 0; } static inline int INET_ECN_decapsulate(struct sk_buff *skb, __u8 outer, __u8 inner) { bool set_ce = false; int rc; rc = __INET_ECN_decapsulate(outer, inner, &set_ce); if (!rc) { if (set_ce) INET_ECN_set_ce(skb); else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1) INET_ECN_set_ect1(skb); } return rc; } static inline int IP_ECN_decapsulate(const struct iphdr *oiph, struct sk_buff *skb) { __u8 inner; switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): inner = ip_hdr(skb)->tos; break; case htons(ETH_P_IPV6): inner = ipv6_get_dsfield(ipv6_hdr(skb)); break; default: return 0; } return INET_ECN_decapsulate(skb, oiph->tos, inner); } static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h, struct sk_buff *skb) { __u8 inner; switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): inner = ip_hdr(skb)->tos; break; case htons(ETH_P_IPV6): inner = ipv6_get_dsfield(ipv6_hdr(skb)); break; default: return 0; } return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TASK_WORK_H #define _LINUX_TASK_WORK_H #include <linux/list.h> #include <linux/sched.h> typedef void (*task_work_func_t)(struct callback_head *); static inline void init_task_work(struct callback_head *twork, task_work_func_t func) { twork->func = func; } enum task_work_notify_mode { TWA_NONE, TWA_RESUME, TWA_SIGNAL, }; int task_work_add(struct task_struct *task, struct callback_head *twork, enum task_work_notify_mode mode); struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t); void task_work_run(void); static inline void exit_task_work(struct task_struct *task) { task_work_run(); } #endif /* _LINUX_TASK_WORK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Definitions for the 'struct ptr_ring' datastructure. * * Author: * Michael S. Tsirkin <mst@redhat.com> * * Copyright (C) 2016 Red Hat, Inc. * * This is a limited-size FIFO maintaining pointers in FIFO order, with * one CPU producing entries and another consuming entries from a FIFO. * * This implementation tries to minimize cache-contention when there is a * single producer and a single consumer CPU. */ #ifndef _LINUX_PTR_RING_H #define _LINUX_PTR_RING_H 1 #ifdef __KERNEL__ #include <linux/spinlock.h> #include <linux/cache.h> #include <linux/types.h> #include <linux/compiler.h> #include <linux/slab.h> #include <linux/mm.h> #include <asm/errno.h> #endif struct ptr_ring { int producer ____cacheline_aligned_in_smp; spinlock_t producer_lock; int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */ int consumer_tail; /* next entry to invalidate */ spinlock_t consumer_lock; /* Shared consumer/producer data */ /* Read-only by both the producer and the consumer */ int size ____cacheline_aligned_in_smp; /* max entries in queue */ int batch; /* number of entries to consume in a batch */ void **queue; }; /* Note: callers invoking this in a loop must use a compiler barrier, * for example cpu_relax(). * * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock: * see e.g. ptr_ring_full. */ static inline bool __ptr_ring_full(struct ptr_ring *r) { return r->queue[r->producer]; } static inline bool ptr_ring_full(struct ptr_ring *r) { bool ret; spin_lock(&r->producer_lock); ret = __ptr_ring_full(r); spin_unlock(&r->producer_lock); return ret; } static inline bool ptr_ring_full_irq(struct ptr_ring *r) { bool ret; spin_lock_irq(&r->producer_lock); ret = __ptr_ring_full(r); spin_unlock_irq(&r->producer_lock); return ret; } static inline bool ptr_ring_full_any(struct ptr_ring *r) { unsigned long flags; bool ret; spin_lock_irqsave(&r->producer_lock, flags); ret = __ptr_ring_full(r); spin_unlock_irqrestore(&r->producer_lock, flags); return ret; } static inline bool ptr_ring_full_bh(struct ptr_ring *r) { bool ret; spin_lock_bh(&r->producer_lock); ret = __ptr_ring_full(r); spin_unlock_bh(&r->producer_lock); return ret; } /* Note: callers invoking this in a loop must use a compiler barrier, * for example cpu_relax(). Callers must hold producer_lock. * Callers are responsible for making sure pointer that is being queued * points to a valid data. */ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) { if (unlikely(!r->size) || r->queue[r->producer]) return -ENOSPC; /* Make sure the pointer we are storing points to a valid data. */ /* Pairs with the dependency ordering in __ptr_ring_consume. */ smp_wmb(); WRITE_ONCE(r->queue[r->producer++], ptr); if (unlikely(r->producer >= r->size)) r->producer = 0; return 0; } /* * Note: resize (below) nests producer lock within consumer lock, so if you * consume in interrupt or BH context, you must disable interrupts/BH when * calling this. */ static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr) { int ret; spin_lock(&r->producer_lock); ret = __ptr_ring_produce(r, ptr); spin_unlock(&r->producer_lock); return ret; } static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr) { int ret; spin_lock_irq(&r->producer_lock); ret = __ptr_ring_produce(r, ptr); spin_unlock_irq(&r->producer_lock); return ret; } static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr) { unsigned long flags; int ret; spin_lock_irqsave(&r->producer_lock, flags); ret = __ptr_ring_produce(r, ptr); spin_unlock_irqrestore(&r->producer_lock, flags); return ret; } static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr) { int ret; spin_lock_bh(&r->producer_lock); ret = __ptr_ring_produce(r, ptr); spin_unlock_bh(&r->producer_lock); return ret; } static inline void *__ptr_ring_peek(struct ptr_ring *r) { if (likely(r->size)) return READ_ONCE(r->queue[r->consumer_head]); return NULL; } /* * Test ring empty status without taking any locks. * * NB: This is only safe to call if ring is never resized. * * However, if some other CPU consumes ring entries at the same time, the value * returned is not guaranteed to be correct. * * In this case - to avoid incorrectly detecting the ring * as empty - the CPU consuming the ring entries is responsible * for either consuming all ring entries until the ring is empty, * or synchronizing with some other CPU and causing it to * re-test __ptr_ring_empty and/or consume the ring enteries * after the synchronization point. * * Note: callers invoking this in a loop must use a compiler barrier, * for example cpu_relax(). */ static inline bool __ptr_ring_empty(struct ptr_ring *r) { if (likely(r->size)) return !r->queue[READ_ONCE(r->consumer_head)]; return true; } static inline bool ptr_ring_empty(struct ptr_ring *r) { bool ret; spin_lock(&r->consumer_lock); ret = __ptr_ring_empty(r); spin_unlock(&r->consumer_lock); return ret; } static inline bool ptr_ring_empty_irq(struct ptr_ring *r) { bool ret; spin_lock_irq(&r->consumer_lock); ret = __ptr_ring_empty(r); spin_unlock_irq(&r->consumer_lock); return ret; } static inline bool ptr_ring_empty_any(struct ptr_ring *r) { unsigned long flags; bool ret; spin_lock_irqsave(&r->consumer_lock, flags); ret = __ptr_ring_empty(r); spin_unlock_irqrestore(&r->consumer_lock, flags); return ret; } static inline bool ptr_ring_empty_bh(struct ptr_ring *r) { bool ret; spin_lock_bh(&r->consumer_lock); ret = __ptr_ring_empty(r); spin_unlock_bh(&r->consumer_lock); return ret; } /* Must only be called after __ptr_ring_peek returned !NULL */ static inline void __ptr_ring_discard_one(struct ptr_ring *r) { /* Fundamentally, what we want to do is update consumer * index and zero out the entry so producer can reuse it. * Doing it naively at each consume would be as simple as: * consumer = r->consumer; * r->queue[consumer++] = NULL; * if (unlikely(consumer >= r->size)) * consumer = 0; * r->consumer = consumer; * but that is suboptimal when the ring is full as producer is writing * out new entries in the same cache line. Defer these updates until a * batch of entries has been consumed. */ /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty * to work correctly. */ int consumer_head = r->consumer_head; int head = consumer_head++; /* Once we have processed enough entries invalidate them in * the ring all at once so producer can reuse their space in the ring. * We also do this when we reach end of the ring - not mandatory * but helps keep the implementation simple. */ if (unlikely(consumer_head - r->consumer_tail >= r->batch || consumer_head >= r->size)) { /* Zero out entries in the reverse order: this way we touch the * cache line that producer might currently be reading the last; * producer won't make progress and touch other cache lines * besides the first one until we write out all entries. */ while (likely(head >= r->consumer_tail)) r->queue[head--] = NULL; r->consumer_tail = consumer_head; } if (unlikely(consumer_head >= r->size)) { consumer_head = 0; r->consumer_tail = 0; } /* matching READ_ONCE in __ptr_ring_empty for lockless tests */ WRITE_ONCE(r->consumer_head, consumer_head); } static inline void *__ptr_ring_consume(struct ptr_ring *r) { void *ptr; /* The READ_ONCE in __ptr_ring_peek guarantees that anyone * accessing data through the pointer is up to date. Pairs * with smp_wmb in __ptr_ring_produce. */ ptr = __ptr_ring_peek(r); if (ptr) __ptr_ring_discard_one(r); return ptr; } static inline int __ptr_ring_consume_batched(struct ptr_ring *r, void **array, int n) { void *ptr; int i; for (i = 0; i < n; i++) { ptr = __ptr_ring_consume(r); if (!ptr) break; array[i] = ptr; } return i; } /* * Note: resize (below) nests producer lock within consumer lock, so if you * call this in interrupt or BH context, you must disable interrupts/BH when * producing. */ static inline void *ptr_ring_consume(struct ptr_ring *r) { void *ptr; spin_lock(&r->consumer_lock); ptr = __ptr_ring_consume(r); spin_unlock(&r->consumer_lock); return ptr; } static inline void *ptr_ring_consume_irq(struct ptr_ring *r) { void *ptr; spin_lock_irq(&r->consumer_lock); ptr = __ptr_ring_consume(r); spin_unlock_irq(&r->consumer_lock); return ptr; } static inline void *ptr_ring_consume_any(struct ptr_ring *r) { unsigned long flags; void *ptr; spin_lock_irqsave(&r->consumer_lock, flags); ptr = __ptr_ring_consume(r); spin_unlock_irqrestore(&r->consumer_lock, flags); return ptr; } static inline void *ptr_ring_consume_bh(struct ptr_ring *r) { void *ptr; spin_lock_bh(&r->consumer_lock); ptr = __ptr_ring_consume(r); spin_unlock_bh(&r->consumer_lock); return ptr; } static inline int ptr_ring_consume_batched(struct ptr_ring *r, void **array, int n) { int ret; spin_lock(&r->consumer_lock); ret = __ptr_ring_consume_batched(r, array, n); spin_unlock(&r->consumer_lock); return ret; } static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r, void **array, int n) { int ret; spin_lock_irq(&r->consumer_lock); ret = __ptr_ring_consume_batched(r, array, n); spin_unlock_irq(&r->consumer_lock); return ret; } static inline int ptr_ring_consume_batched_any(struct ptr_ring *r, void **array, int n) { unsigned long flags; int ret; spin_lock_irqsave(&r->consumer_lock, flags); ret = __ptr_ring_consume_batched(r, array, n); spin_unlock_irqrestore(&r->consumer_lock, flags); return ret; } static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, void **array, int n) { int ret; spin_lock_bh(&r->consumer_lock); ret = __ptr_ring_consume_batched(r, array, n); spin_unlock_bh(&r->consumer_lock); return ret; } /* Cast to structure type and call a function without discarding from FIFO. * Function must return a value. * Callers must take consumer_lock. */ #define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r))) #define PTR_RING_PEEK_CALL(r, f) ({ \ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ \ spin_lock(&(r)->consumer_lock); \ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ spin_unlock(&(r)->consumer_lock); \ __PTR_RING_PEEK_CALL_v; \ }) #define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ \ spin_lock_irq(&(r)->consumer_lock); \ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ spin_unlock_irq(&(r)->consumer_lock); \ __PTR_RING_PEEK_CALL_v; \ }) #define PTR_RING_PEEK_CALL_BH(r, f) ({ \ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ \ spin_lock_bh(&(r)->consumer_lock); \ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ spin_unlock_bh(&(r)->consumer_lock); \ __PTR_RING_PEEK_CALL_v; \ }) #define PTR_RING_PEEK_CALL_ANY(r, f) ({ \ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ unsigned long __PTR_RING_PEEK_CALL_f;\ \ spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \ __PTR_RING_PEEK_CALL_v; \ }) /* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See * documentation for vmalloc for which of them are legal. */ static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) { if (size > KMALLOC_MAX_SIZE / sizeof(void *)) return NULL; return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO); } static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) { r->size = size; r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue)); /* We need to set batch at least to 1 to make logic * in __ptr_ring_discard_one work correctly. * Batching too much (because ring is small) would cause a lot of * burstiness. Needs tuning, for now disable batching. */ if (r->batch > r->size / 2 || !r->batch) r->batch = 1; } static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) { r->queue = __ptr_ring_init_queue_alloc(size, gfp); if (!r->queue) return -ENOMEM; __ptr_ring_set_size(r, size); r->producer = r->consumer_head = r->consumer_tail = 0; spin_lock_init(&r->producer_lock); spin_lock_init(&r->consumer_lock); return 0; } /* * Return entries into ring. Destroy entries that don't fit. * * Note: this is expected to be a rare slow path operation. * * Note: producer lock is nested within consumer lock, so if you * resize you must make sure all uses nest correctly. * In particular if you consume ring in interrupt or BH context, you must * disable interrupts/BH when doing so. */ static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n, void (*destroy)(void *)) { unsigned long flags; int head; spin_lock_irqsave(&r->consumer_lock, flags); spin_lock(&r->producer_lock); if (!r->size) goto done; /* * Clean out buffered entries (for simplicity). This way following code * can test entries for NULL and if not assume they are valid. */ head = r->consumer_head - 1; while (likely(head >= r->consumer_tail)) r->queue[head--] = NULL; r->consumer_tail = r->consumer_head; /* * Go over entries in batch, start moving head back and copy entries. * Stop when we run into previously unconsumed entries. */ while (n) { head = r->consumer_head - 1; if (head < 0) head = r->size - 1; if (r->queue[head]) { /* This batch entry will have to be destroyed. */ goto done; } r->queue[head] = batch[--n]; r->consumer_tail = head; /* matching READ_ONCE in __ptr_ring_empty for lockless tests */ WRITE_ONCE(r->consumer_head, head); } done: /* Destroy all entries left in the batch. */ while (n) destroy(batch[--n]); spin_unlock(&r->producer_lock); spin_unlock_irqrestore(&r->consumer_lock, flags); } static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, int size, gfp_t gfp, void (*destroy)(void *)) { int producer = 0; void **old; void *ptr; while ((ptr = __ptr_ring_consume(r))) if (producer < size) queue[producer++] = ptr; else if (destroy) destroy(ptr); if (producer >= size) producer = 0; __ptr_ring_set_size(r, size); r->producer = producer; r->consumer_head = 0; r->consumer_tail = 0; old = r->queue; r->queue = queue; return old; } /* * Note: producer lock is nested within consumer lock, so if you * resize you must make sure all uses nest correctly. * In particular if you consume ring in interrupt or BH context, you must * disable interrupts/BH when doing so. */ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, void (*destroy)(void *)) { unsigned long flags; void **queue = __ptr_ring_init_queue_alloc(size, gfp); void **old; if (!queue) return -ENOMEM; spin_lock_irqsave(&(r)->consumer_lock, flags); spin_lock(&(r)->producer_lock); old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy); spin_unlock(&(r)->producer_lock); spin_unlock_irqrestore(&(r)->consumer_lock, flags); kvfree(old); return 0; } /* * Note: producer lock is nested within consumer lock, so if you * resize you must make sure all uses nest correctly. * In particular if you consume ring in interrupt or BH context, you must * disable interrupts/BH when doing so. */ static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, unsigned int nrings, int size, gfp_t gfp, void (*destroy)(void *)) { unsigned long flags; void ***queues; int i; queues = kmalloc_array(nrings, sizeof(*queues), gfp); if (!queues) goto noqueues; for (i = 0; i < nrings; ++i) { queues[i] = __ptr_ring_init_queue_alloc(size, gfp); if (!queues[i]) goto nomem; } for (i = 0; i < nrings; ++i) { spin_lock_irqsave(&(rings[i])->consumer_lock, flags); spin_lock(&(rings[i])->producer_lock); queues[i] = __ptr_ring_swap_queue(rings[i], queues[i], size, gfp, destroy); spin_unlock(&(rings[i])->producer_lock); spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags); } for (i = 0; i < nrings; ++i) kvfree(queues[i]); kfree(queues); return 0; nomem: while (--i >= 0) kvfree(queues[i]); kfree(queues); noqueues: return -ENOMEM; } static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *)) { void *ptr; if (destroy) while ((ptr = ptr_ring_consume(r))) destroy(ptr); kvfree(r->queue); } #endif /* _LINUX_PTR_RING_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 /* SPDX-License-Identifier: GPL-2.0 */ /* * ioport.h Definitions of routines for detecting, reserving and * allocating system resources. * * Authors: Linus Torvalds */ #ifndef _LINUX_IOPORT_H #define _LINUX_IOPORT_H #ifndef __ASSEMBLY__ #include <linux/compiler.h> #include <linux/types.h> #include <linux/bits.h> /* * Resources are tree-like, allowing * nesting etc.. */ struct resource { resource_size_t start; resource_size_t end; const char *name; unsigned long flags; unsigned long desc; struct resource *parent, *sibling, *child; }; /* * IO resources have these defined flags. * * PCI devices expose these flags to userspace in the "resource" sysfs file, * so don't move them. */ #define IORESOURCE_BITS 0x000000ff /* Bus-specific bits */ #define IORESOURCE_TYPE_BITS 0x00001f00 /* Resource type */ #define IORESOURCE_IO 0x00000100 /* PCI/ISA I/O ports */ #define IORESOURCE_MEM 0x00000200 #define IORESOURCE_REG 0x00000300 /* Register offsets */ #define IORESOURCE_IRQ 0x00000400 #define IORESOURCE_DMA 0x00000800 #define IORESOURCE_BUS 0x00001000 #define IORESOURCE_PREFETCH 0x00002000 /* No side effects */ #define IORESOURCE_READONLY 0x00004000 #define IORESOURCE_CACHEABLE 0x00008000 #define IORESOURCE_RANGELENGTH 0x00010000 #define IORESOURCE_SHADOWABLE 0x00020000 #define IORESOURCE_SIZEALIGN 0x00040000 /* size indicates alignment */ #define IORESOURCE_STARTALIGN 0x00080000 /* start field is alignment */ #define IORESOURCE_MEM_64 0x00100000 #define IORESOURCE_WINDOW 0x00200000 /* forwarded by bridge */ #define IORESOURCE_MUXED 0x00400000 /* Resource is software muxed */ #define IORESOURCE_EXT_TYPE_BITS 0x01000000 /* Resource extended types */ #define IORESOURCE_SYSRAM 0x01000000 /* System RAM (modifier) */ /* IORESOURCE_SYSRAM specific bits. */ #define IORESOURCE_SYSRAM_DRIVER_MANAGED 0x02000000 /* Always detected via a driver. */ #define IORESOURCE_SYSRAM_MERGEABLE 0x04000000 /* Resource can be merged. */ #define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */ #define IORESOURCE_DISABLED 0x10000000 #define IORESOURCE_UNSET 0x20000000 /* No address assigned yet */ #define IORESOURCE_AUTO 0x40000000 #define IORESOURCE_BUSY 0x80000000 /* Driver has marked this resource busy */ /* I/O resource extended types */ #define IORESOURCE_SYSTEM_RAM (IORESOURCE_MEM|IORESOURCE_SYSRAM) /* PnP IRQ specific bits (IORESOURCE_BITS) */ #define IORESOURCE_IRQ_HIGHEDGE (1<<0) #define IORESOURCE_IRQ_LOWEDGE (1<<1) #define IORESOURCE_IRQ_HIGHLEVEL (1<<2) #define IORESOURCE_IRQ_LOWLEVEL (1<<3) #define IORESOURCE_IRQ_SHAREABLE (1<<4) #define IORESOURCE_IRQ_OPTIONAL (1<<5) /* PnP DMA specific bits (IORESOURCE_BITS) */ #define IORESOURCE_DMA_TYPE_MASK (3<<0) #define IORESOURCE_DMA_8BIT (0<<0) #define IORESOURCE_DMA_8AND16BIT (1<<0) #define IORESOURCE_DMA_16BIT (2<<0) #define IORESOURCE_DMA_MASTER (1<<2) #define IORESOURCE_DMA_BYTE (1<<3) #define IORESOURCE_DMA_WORD (1<<4) #define IORESOURCE_DMA_SPEED_MASK (3<<6) #define IORESOURCE_DMA_COMPATIBLE (0<<6) #define IORESOURCE_DMA_TYPEA (1<<6) #define IORESOURCE_DMA_TYPEB (2<<6) #define IORESOURCE_DMA_TYPEF (3<<6) /* PnP memory I/O specific bits (IORESOURCE_BITS) */ #define IORESOURCE_MEM_WRITEABLE (1<<0) /* dup: IORESOURCE_READONLY */ #define IORESOURCE_MEM_CACHEABLE (1<<1) /* dup: IORESOURCE_CACHEABLE */ #define IORESOURCE_MEM_RANGELENGTH (1<<2) /* dup: IORESOURCE_RANGELENGTH */ #define IORESOURCE_MEM_TYPE_MASK (3<<3) #define IORESOURCE_MEM_8BIT (0<<3) #define IORESOURCE_MEM_16BIT (1<<3) #define IORESOURCE_MEM_8AND16BIT (2<<3) #define IORESOURCE_MEM_32BIT (3<<3) #define IORESOURCE_MEM_SHADOWABLE (1<<5) /* dup: IORESOURCE_SHADOWABLE */ #define IORESOURCE_MEM_EXPANSIONROM (1<<6) /* PnP I/O specific bits (IORESOURCE_BITS) */ #define IORESOURCE_IO_16BIT_ADDR (1<<0) #define IORESOURCE_IO_FIXED (1<<1) #define IORESOURCE_IO_SPARSE (1<<2) /* PCI ROM control bits (IORESOURCE_BITS) */ #define IORESOURCE_ROM_ENABLE (1<<0) /* ROM is enabled, same as PCI_ROM_ADDRESS_ENABLE */ #define IORESOURCE_ROM_SHADOW (1<<1) /* Use RAM image, not ROM BAR */ /* PCI control bits. Shares IORESOURCE_BITS with above PCI ROM. */ #define IORESOURCE_PCI_FIXED (1<<4) /* Do not move resource */ #define IORESOURCE_PCI_EA_BEI (1<<5) /* BAR Equivalent Indicator */ /* * I/O Resource Descriptors * * Descriptors are used by walk_iomem_res_desc() and region_intersects() * for searching a specific resource range in the iomem table. Assign * a new descriptor when a resource range supports the search interfaces. * Otherwise, resource.desc must be set to IORES_DESC_NONE (0). */ enum { IORES_DESC_NONE = 0, IORES_DESC_CRASH_KERNEL = 1, IORES_DESC_ACPI_TABLES = 2, IORES_DESC_ACPI_NV_STORAGE = 3, IORES_DESC_PERSISTENT_MEMORY = 4, IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, IORES_DESC_DEVICE_PRIVATE_MEMORY = 6, IORES_DESC_RESERVED = 7, IORES_DESC_SOFT_RESERVED = 8, }; /* * Flags controlling ioremap() behavior. */ enum { IORES_MAP_SYSTEM_RAM = BIT(0), IORES_MAP_ENCRYPTED = BIT(1), }; /* helpers to define resources */ #define DEFINE_RES_NAMED(_start, _size, _name, _flags) \ { \ .start = (_start), \ .end = (_start) + (_size) - 1, \ .name = (_name), \ .flags = (_flags), \ .desc = IORES_DESC_NONE, \ } #define DEFINE_RES_IO_NAMED(_start, _size, _name) \ DEFINE_RES_NAMED((_start), (_size), (_name), IORESOURCE_IO) #define DEFINE_RES_IO(_start, _size) \ DEFINE_RES_IO_NAMED((_start), (_size), NULL) #define DEFINE_RES_MEM_NAMED(_start, _size, _name) \ DEFINE_RES_NAMED((_start), (_size), (_name), IORESOURCE_MEM) #define DEFINE_RES_MEM(_start, _size) \ DEFINE_RES_MEM_NAMED((_start), (_size), NULL) #define DEFINE_RES_IRQ_NAMED(_irq, _name) \ DEFINE_RES_NAMED((_irq), 1, (_name), IORESOURCE_IRQ) #define DEFINE_RES_IRQ(_irq) \ DEFINE_RES_IRQ_NAMED((_irq), NULL) #define DEFINE_RES_DMA_NAMED(_dma, _name) \ DEFINE_RES_NAMED((_dma), 1, (_name), IORESOURCE_DMA) #define DEFINE_RES_DMA(_dma) \ DEFINE_RES_DMA_NAMED((_dma), NULL) /* PC/ISA/whatever - the normal PC address spaces: IO and memory */ extern struct resource ioport_resource; extern struct resource iomem_resource; extern struct resource *request_resource_conflict(struct resource *root, struct resource *new); extern int request_resource(struct resource *root, struct resource *new); extern int release_resource(struct resource *new); void release_child_resources(struct resource *new); extern void reserve_region_with_split(struct resource *root, resource_size_t start, resource_size_t end, const char *name); extern struct resource *insert_resource_conflict(struct resource *parent, struct resource *new); extern int insert_resource(struct resource *parent, struct resource *new); extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new); extern int remove_resource(struct resource *old); extern void arch_remove_reservations(struct resource *avail); extern int allocate_resource(struct resource *root, struct resource *new, resource_size_t size, resource_size_t min, resource_size_t max, resource_size_t align, resource_size_t (*alignf)(void *, const struct resource *, resource_size_t, resource_size_t), void *alignf_data); struct resource *lookup_resource(struct resource *root, resource_size_t start); int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size); resource_size_t resource_alignment(struct resource *res); static inline resource_size_t resource_size(const struct resource *res) { return res->end - res->start + 1; } static inline unsigned long resource_type(const struct resource *res) { return res->flags & IORESOURCE_TYPE_BITS; } static inline unsigned long resource_ext_type(const struct resource *res) { return res->flags & IORESOURCE_EXT_TYPE_BITS; } /* True iff r1 completely contains r2 */ static inline bool resource_contains(struct resource *r1, struct resource *r2) { if (resource_type(r1) != resource_type(r2)) return false; if (r1->flags & IORESOURCE_UNSET || r2->flags & IORESOURCE_UNSET) return false; return r1->start <= r2->start && r1->end >= r2->end; } /* Convenience shorthand with allocation */ #define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), 0) #define request_muxed_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), IORESOURCE_MUXED) #define __request_mem_region(start,n,name, excl) __request_region(&iomem_resource, (start), (n), (name), excl) #define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0) #define request_mem_region_exclusive(start,n,name) \ __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_EXCLUSIVE) #define rename_region(region, newname) do { (region)->name = (newname); } while (0) extern struct resource * __request_region(struct resource *, resource_size_t start, resource_size_t n, const char *name, int flags); /* Compatibility cruft */ #define release_region(start,n) __release_region(&ioport_resource, (start), (n)) #define release_mem_region(start,n) __release_region(&iomem_resource, (start), (n)) extern void __release_region(struct resource *, resource_size_t, resource_size_t); #ifdef CONFIG_MEMORY_HOTREMOVE extern void release_mem_region_adjustable(resource_size_t, resource_size_t); #endif #ifdef CONFIG_MEMORY_HOTPLUG extern void merge_system_ram_resource(struct resource *res); #endif /* Wrappers for managed devices */ struct device; extern int devm_request_resource(struct device *dev, struct resource *root, struct resource *new); extern void devm_release_resource(struct device *dev, struct resource *new); #define devm_request_region(dev,start,n,name) \ __devm_request_region(dev, &ioport_resource, (start), (n), (name)) #define devm_request_mem_region(dev,start,n,name) \ __devm_request_region(dev, &iomem_resource, (start), (n), (name)) extern struct resource * __devm_request_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n, const char *name); #define devm_release_region(dev, start, n) \ __devm_release_region(dev, &ioport_resource, (start), (n)) #define devm_release_mem_region(dev, start, n) \ __devm_release_region(dev, &iomem_resource, (start), (n)) extern void __devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n); extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size); extern bool iomem_is_exclusive(u64 addr); extern int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)); extern int walk_mem_res(u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)); extern int walk_system_ram_res(u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)); extern int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)); /* True if any part of r1 overlaps r2 */ static inline bool resource_overlaps(struct resource *r1, struct resource *r2) { return (r1->start <= r2->end && r1->end >= r2->start); } struct resource *devm_request_free_mem_region(struct device *dev, struct resource *base, unsigned long size); struct resource *request_free_mem_region(struct resource *base, unsigned long size, const char *name); #ifdef CONFIG_IO_STRICT_DEVMEM void revoke_devmem(struct resource *res); #else static inline void revoke_devmem(struct resource *res) { }; #endif #endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 // SPDX-License-Identifier: GPL-2.0+ /* * ext4_jbd2.h * * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 * * Copyright 1998--1999 Red Hat corp --- All Rights Reserved * * Ext4-specific journaling extensions. */ #ifndef _EXT4_JBD2_H #define _EXT4_JBD2_H #include <linux/fs.h> #include <linux/jbd2.h> #include "ext4.h" #define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal) /* Define the number of blocks we need to account to a transaction to * modify one block of data. * * We may have to touch one inode, one bitmap buffer, up to three * indirection blocks, the group and superblock summaries, and the data * block to complete the transaction. * * For extents-enabled fs we may have to allocate and modify up to * 5 levels of tree, data block (for each of these we need bitmap + group * summaries), root which is stored in the inode, sb */ #define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \ (ext4_has_feature_extents(sb) ? 20U : 8U) /* Extended attribute operations touch at most two data buffers, * two bitmap buffers, and two group summaries, in addition to the inode * and the superblock, which are already accounted for. */ #define EXT4_XATTR_TRANS_BLOCKS 6U /* Define the minimum size for a transaction which modifies data. This * needs to take into account the fact that we may end up modifying two * quota files too (one for the group, one for the user quota). The * superblock only gets updated once, of course, so don't bother * counting that again for the quota updates. */ #define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \ EXT4_XATTR_TRANS_BLOCKS - 2 + \ EXT4_MAXQUOTAS_TRANS_BLOCKS(sb)) /* * Define the number of metadata blocks we need to account to modify data. * * This include super block, inode block, quota blocks and xattr blocks */ #define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ EXT4_MAXQUOTAS_TRANS_BLOCKS(sb)) /* Define an arbitrary limit for the amount of data we will anticipate * writing to any given transaction. For unbounded transactions such as * write(2) and truncate(2) we can write more than this, but we always * start off at the maximum transaction size and grow the transaction * optimistically as we go. */ #define EXT4_MAX_TRANS_DATA 64U /* We break up a large truncate or write transaction once the handle's * buffer credits gets this low, we need either to extend the * transaction or to start a new one. Reserve enough space here for * inode, bitmap, superblock, group and indirection updates for at least * one block, plus two quota updates. Quota allocations are not * needed. */ #define EXT4_RESERVE_TRANS_BLOCKS 12U /* * Number of credits needed if we need to insert an entry into a * directory. For each new index block, we need 4 blocks (old index * block, new index block, bitmap block, bg summary). For normal * htree directories there are 2 levels; if the largedir feature * enabled it's 3 levels. */ #define EXT4_INDEX_EXTRA_TRANS_BLOCKS 12U #ifdef CONFIG_QUOTA /* Amount of blocks needed for quota update - we know that the structure was * allocated so we need to update only data block */ #define EXT4_QUOTA_TRANS_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ ext4_has_feature_quota(sb)) ? 1 : 0) /* Amount of blocks needed for quota insert/delete - we do some block writes * but inode, sb and group updates are done only once */ #define EXT4_QUOTA_INIT_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ ext4_has_feature_quota(sb)) ?\ (DQUOT_INIT_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ +3+DQUOT_INIT_REWRITE) : 0) #define EXT4_QUOTA_DEL_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ ext4_has_feature_quota(sb)) ?\ (DQUOT_DEL_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ +3+DQUOT_DEL_REWRITE) : 0) #else #define EXT4_QUOTA_TRANS_BLOCKS(sb) 0 #define EXT4_QUOTA_INIT_BLOCKS(sb) 0 #define EXT4_QUOTA_DEL_BLOCKS(sb) 0 #endif #define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb)) #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) /* * Ext4 handle operation types -- for logging purposes */ #define EXT4_HT_MISC 0 #define EXT4_HT_INODE 1 #define EXT4_HT_WRITE_PAGE 2 #define EXT4_HT_MAP_BLOCKS 3 #define EXT4_HT_DIR 4 #define EXT4_HT_TRUNCATE 5 #define EXT4_HT_QUOTA 6 #define EXT4_HT_RESIZE 7 #define EXT4_HT_MIGRATE 8 #define EXT4_HT_MOVE_EXTENTS 9 #define EXT4_HT_XATTR 10 #define EXT4_HT_EXT_CONVERT 11 #define EXT4_HT_MAX 12 /** * struct ext4_journal_cb_entry - Base structure for callback information. * * This struct is a 'seed' structure for a using with your own callback * structs. If you are using callbacks you must allocate one of these * or another struct of your own definition which has this struct * as it's first element and pass it to ext4_journal_callback_add(). */ struct ext4_journal_cb_entry { /* list information for other callbacks attached to the same handle */ struct list_head jce_list; /* Function to call with this callback structure */ void (*jce_func)(struct super_block *sb, struct ext4_journal_cb_entry *jce, int error); /* user data goes here */ }; /** * ext4_journal_callback_add: add a function to call after transaction commit * @handle: active journal transaction handle to register callback on * @func: callback function to call after the transaction has committed: * @sb: superblock of current filesystem for transaction * @jce: returned journal callback data * @rc: journal state at commit (0 = transaction committed properly) * @jce: journal callback data (internal and function private data struct) * * The registered function will be called in the context of the journal thread * after the transaction for which the handle was created has completed. * * No locks are held when the callback function is called, so it is safe to * call blocking functions from within the callback, but the callback should * not block or run for too long, or the filesystem will be blocked waiting for * the next transaction to commit. No journaling functions can be used, or * there is a risk of deadlock. * * There is no guaranteed calling order of multiple registered callbacks on * the same transaction. */ static inline void _ext4_journal_callback_add(handle_t *handle, struct ext4_journal_cb_entry *jce) { /* Add the jce to transaction's private list */ list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list); } static inline void ext4_journal_callback_add(handle_t *handle, void (*func)(struct super_block *sb, struct ext4_journal_cb_entry *jce, int rc), struct ext4_journal_cb_entry *jce) { struct ext4_sb_info *sbi = EXT4_SB(handle->h_transaction->t_journal->j_private); /* Add the jce to transaction's private list */ jce->jce_func = func; spin_lock(&sbi->s_md_lock); _ext4_journal_callback_add(handle, jce); spin_unlock(&sbi->s_md_lock); } /** * ext4_journal_callback_del: delete a registered callback * @handle: active journal transaction handle on which callback was registered * @jce: registered journal callback entry to unregister * Return true if object was successfully removed */ static inline bool ext4_journal_callback_try_del(handle_t *handle, struct ext4_journal_cb_entry *jce) { bool deleted; struct ext4_sb_info *sbi = EXT4_SB(handle->h_transaction->t_journal->j_private); spin_lock(&sbi->s_md_lock); deleted = !list_empty(&jce->jce_list); list_del_init(&jce->jce_list); spin_unlock(&sbi->s_md_lock); return deleted; } int ext4_mark_iloc_dirty(handle_t *handle, struct inode *inode, struct ext4_iloc *iloc); /* * On success, We end up with an outstanding reference count against * iloc->bh. This _must_ be cleaned up later. */ int ext4_reserve_inode_write(handle_t *handle, struct inode *inode, struct ext4_iloc *iloc); #define ext4_mark_inode_dirty(__h, __i) \ __ext4_mark_inode_dirty((__h), (__i), __func__, __LINE__) int __ext4_mark_inode_dirty(handle_t *handle, struct inode *inode, const char *func, unsigned int line); int ext4_expand_extra_isize(struct inode *inode, unsigned int new_extra_isize, struct ext4_iloc *iloc); /* * Wrapper functions with which ext4 calls into JBD. */ int __ext4_journal_get_write_access(const char *where, unsigned int line, handle_t *handle, struct buffer_head *bh); int __ext4_forget(const char *where, unsigned int line, handle_t *handle, int is_metadata, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t blocknr); int __ext4_journal_get_create_access(const char *where, unsigned int line, handle_t *handle, struct buffer_head *bh); int __ext4_handle_dirty_metadata(const char *where, unsigned int line, handle_t *handle, struct inode *inode, struct buffer_head *bh); int __ext4_handle_dirty_super(const char *where, unsigned int line, handle_t *handle, struct super_block *sb); #define ext4_journal_get_write_access(handle, bh) \ __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ __ext4_forget(__func__, __LINE__, (handle), (is_metadata), (inode), \ (bh), (block_nr)) #define ext4_journal_get_create_access(handle, bh) \ __ext4_journal_get_create_access(__func__, __LINE__, (handle), (bh)) #define ext4_handle_dirty_metadata(handle, inode, bh) \ __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \ (bh)) #define ext4_handle_dirty_super(handle, sb) \ __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, int type, int blocks, int rsv_blocks, int revoke_creds); int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) /* Note: Do not use this for NULL handles. This is only to determine if * a properly allocated handle is using a journal or not. */ static inline int ext4_handle_valid(handle_t *handle) { if ((unsigned long)handle < EXT4_NOJOURNAL_MAX_REF_COUNT) return 0; return 1; } static inline void ext4_handle_sync(handle_t *handle) { if (ext4_handle_valid(handle)) handle->h_sync = 1; } static inline int ext4_handle_is_aborted(handle_t *handle) { if (ext4_handle_valid(handle)) return is_handle_aborted(handle); return 0; } static inline int ext4_free_metadata_revoke_credits(struct super_block *sb, int blocks) { /* Freeing each metadata block can result in freeing one cluster */ return blocks * EXT4_SB(sb)->s_cluster_ratio; } static inline int ext4_trans_default_revoke_credits(struct super_block *sb) { return ext4_free_metadata_revoke_credits(sb, 8); } #define ext4_journal_start_sb(sb, type, nblocks) \ __ext4_journal_start_sb((sb), __LINE__, (type), (nblocks), 0, \ ext4_trans_default_revoke_credits(sb)) #define ext4_journal_start(inode, type, nblocks) \ __ext4_journal_start((inode), __LINE__, (type), (nblocks), 0, \ ext4_trans_default_revoke_credits((inode)->i_sb)) #define ext4_journal_start_with_reserve(inode, type, blocks, rsv_blocks)\ __ext4_journal_start((inode), __LINE__, (type), (blocks), (rsv_blocks),\ ext4_trans_default_revoke_credits((inode)->i_sb)) #define ext4_journal_start_with_revoke(inode, type, blocks, revoke_creds) \ __ext4_journal_start((inode), __LINE__, (type), (blocks), 0, \ (revoke_creds)) static inline handle_t *__ext4_journal_start(struct inode *inode, unsigned int line, int type, int blocks, int rsv_blocks, int revoke_creds) { return __ext4_journal_start_sb(inode->i_sb, line, type, blocks, rsv_blocks, revoke_creds); } #define ext4_journal_stop(handle) \ __ext4_journal_stop(__func__, __LINE__, (handle)) #define ext4_journal_start_reserved(handle, type) \ __ext4_journal_start_reserved((handle), __LINE__, (type)) handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, int type); static inline handle_t *ext4_journal_current_handle(void) { return journal_current_handle(); } static inline int ext4_journal_extend(handle_t *handle, int nblocks, int revoke) { if (ext4_handle_valid(handle)) return jbd2_journal_extend(handle, nblocks, revoke); return 0; } static inline int ext4_journal_restart(handle_t *handle, int nblocks, int revoke) { if (ext4_handle_valid(handle)) return jbd2__journal_restart(handle, nblocks, revoke, GFP_NOFS); return 0; } int __ext4_journal_ensure_credits(handle_t *handle, int check_cred, int extend_cred, int revoke_cred); /* * Ensure @handle has at least @check_creds credits available. If not, * transaction will be extended or restarted to contain at least @extend_cred * credits. Before restarting transaction @fn is executed to allow for cleanup * before the transaction is restarted. * * The return value is < 0 in case of error, 0 in case the handle has enough * credits or transaction extension succeeded, 1 in case transaction had to be * restarted. */ #define ext4_journal_ensure_credits_fn(handle, check_cred, extend_cred, \ revoke_cred, fn) \ ({ \ __label__ __ensure_end; \ int err = __ext4_journal_ensure_credits((handle), (check_cred), \ (extend_cred), (revoke_cred)); \ \ if (err <= 0) \ goto __ensure_end; \ err = (fn); \ if (err < 0) \ goto __ensure_end; \ err = ext4_journal_restart((handle), (extend_cred), (revoke_cred)); \ if (err == 0) \ err = 1; \ __ensure_end: \ err; \ }) /* * Ensure given handle has at least requested amount of credits available, * possibly restarting transaction if needed. We also make sure the transaction * has space for at least ext4_trans_default_revoke_credits(sb) revoke records * as freeing one or two blocks is very common pattern and requesting this is * very cheap. */ static inline int ext4_journal_ensure_credits(handle_t *handle, int credits, int revoke_creds) { return ext4_journal_ensure_credits_fn(handle, credits, credits, revoke_creds, 0); } static inline int ext4_journal_blocks_per_page(struct inode *inode) { if (EXT4_JOURNAL(inode) != NULL) return jbd2_journal_blocks_per_page(inode); return 0; } static inline int ext4_journal_force_commit(journal_t *journal) { if (journal) return jbd2_journal_force_commit(journal); return 0; } static inline int ext4_jbd2_inode_add_write(handle_t *handle, struct inode *inode, loff_t start_byte, loff_t length) { if (ext4_handle_valid(handle)) return jbd2_journal_inode_ranged_write(handle, EXT4_I(inode)->jinode, start_byte, length); return 0; } static inline int ext4_jbd2_inode_add_wait(handle_t *handle, struct inode *inode, loff_t start_byte, loff_t length) { if (ext4_handle_valid(handle)) return jbd2_journal_inode_ranged_wait(handle, EXT4_I(inode)->jinode, start_byte, length); return 0; } static inline void ext4_update_inode_fsync_trans(handle_t *handle, struct inode *inode, int datasync) { struct ext4_inode_info *ei = EXT4_I(inode); if (ext4_handle_valid(handle) && !is_handle_aborted(handle)) { ei->i_sync_tid = handle->h_transaction->t_tid; if (datasync) ei->i_datasync_tid = handle->h_transaction->t_tid; } } /* super.c */ int ext4_force_commit(struct super_block *sb); /* * Ext4 inode journal modes */ #define EXT4_INODE_JOURNAL_DATA_MODE 0x01 /* journal data mode */ #define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */ #define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */ int ext4_inode_journal_mode(struct inode *inode); static inline int ext4_should_journal_data(struct inode *inode) { return ext4_inode_journal_mode(inode) & EXT4_INODE_JOURNAL_DATA_MODE; } static inline int ext4_should_order_data(struct inode *inode) { return ext4_inode_journal_mode(inode) & EXT4_INODE_ORDERED_DATA_MODE; } static inline int ext4_should_writeback_data(struct inode *inode) { return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE; } static inline int ext4_free_data_revoke_credits(struct inode *inode, int blocks) { if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) return 0; if (!ext4_should_journal_data(inode)) return 0; /* * Data blocks in one extent are contiguous, just account for partial * clusters at extent boundaries */ return blocks + 2*(EXT4_SB(inode->i_sb)->s_cluster_ratio - 1); } /* * This function controls whether or not we should try to go down the * dioread_nolock code paths, which makes it safe to avoid taking * i_mutex for direct I/O reads. This only works for extent-based * files, and it doesn't work if data journaling is enabled, since the * dioread_nolock code uses b_private to pass information back to the * I/O completion handler, and this conflicts with the jbd's use of * b_private. */ static inline int ext4_should_dioread_nolock(struct inode *inode) { if (!test_opt(inode->i_sb, DIOREAD_NOLOCK)) return 0; if (!S_ISREG(inode->i_mode)) return 0; if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return 0; if (ext4_should_journal_data(inode)) return 0; /* temporary fix to prevent generic/422 test failures */ if (!test_opt(inode->i_sb, DELALLOC)) return 0; return 1; } #endif /* _EXT4_JBD2_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 /* SPDX-License-Identifier: GPL-2.0 */ /* * Access vector cache interface for object managers. * * Author : Stephen Smalley, <sds@tycho.nsa.gov> */ #ifndef _SELINUX_AVC_H_ #define _SELINUX_AVC_H_ #include <linux/stddef.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/kdev_t.h> #include <linux/spinlock.h> #include <linux/init.h> #include <linux/audit.h> #include <linux/lsm_audit.h> #include <linux/in6.h> #include "flask.h" #include "av_permissions.h" #include "security.h" /* * An entry in the AVC. */ struct avc_entry; struct task_struct; struct inode; struct sock; struct sk_buff; /* * AVC statistics */ struct avc_cache_stats { unsigned int lookups; unsigned int misses; unsigned int allocations; unsigned int reclaims; unsigned int frees; }; /* * We only need this data after we have decided to send an audit message. */ struct selinux_audit_data { u32 ssid; u32 tsid; u16 tclass; u32 requested; u32 audited; u32 denied; int result; struct selinux_state *state; }; /* * AVC operations */ void __init avc_init(void); static inline u32 avc_audit_required(u32 requested, struct av_decision *avd, int result, u32 auditdeny, u32 *deniedp) { u32 denied, audited; denied = requested & ~avd->allowed; if (unlikely(denied)) { audited = denied & avd->auditdeny; /* * auditdeny is TRICKY! Setting a bit in * this field means that ANY denials should NOT be audited if * the policy contains an explicit dontaudit rule for that * permission. Take notice that this is unrelated to the * actual permissions that were denied. As an example lets * assume: * * denied == READ * avd.auditdeny & ACCESS == 0 (not set means explicit rule) * auditdeny & ACCESS == 1 * * We will NOT audit the denial even though the denied * permission was READ and the auditdeny checks were for * ACCESS */ if (auditdeny && !(auditdeny & avd->auditdeny)) audited = 0; } else if (result) audited = denied = requested; else audited = requested & avd->auditallow; *deniedp = denied; return audited; } int slow_avc_audit(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 requested, u32 audited, u32 denied, int result, struct common_audit_data *a); /** * avc_audit - Audit the granting or denial of permissions. * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class * @requested: requested permissions * @avd: access vector decisions * @result: result from avc_has_perm_noaudit * @a: auxiliary audit data * @flags: VFS walk flags * * Audit the granting or denial of permissions in accordance * with the policy. This function is typically called by * avc_has_perm() after a permission check, but can also be * called directly by callers who use avc_has_perm_noaudit() * in order to separate the permission check from the auditing. * For example, this separation is useful when the permission check must * be performed under a lock, to allow the lock to be released * before calling the auditing code. */ static inline int avc_audit(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 requested, struct av_decision *avd, int result, struct common_audit_data *a, int flags) { u32 audited, denied; audited = avc_audit_required(requested, avd, result, 0, &denied); if (likely(!audited)) return 0; /* fall back to ref-walk if we have to generate audit */ if (flags & MAY_NOT_BLOCK) return -ECHILD; return slow_avc_audit(state, ssid, tsid, tclass, requested, audited, denied, result, a); } #define AVC_STRICT 1 /* Ignore permissive mode. */ #define AVC_EXTENDED_PERMS 2 /* update extended permissions */ #define AVC_NONBLOCKING 4 /* non blocking */ int avc_has_perm_noaudit(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 requested, unsigned flags, struct av_decision *avd); int avc_has_perm(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 requested, struct common_audit_data *auditdata); int avc_has_perm_flags(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 requested, struct common_audit_data *auditdata, int flags); int avc_has_extended_perms(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 requested, u8 driver, u8 perm, struct common_audit_data *ad); u32 avc_policy_seqno(struct selinux_state *state); #define AVC_CALLBACK_GRANT 1 #define AVC_CALLBACK_TRY_REVOKE 2 #define AVC_CALLBACK_REVOKE 4 #define AVC_CALLBACK_RESET 8 #define AVC_CALLBACK_AUDITALLOW_ENABLE 16 #define AVC_CALLBACK_AUDITALLOW_DISABLE 32 #define AVC_CALLBACK_AUDITDENY_ENABLE 64 #define AVC_CALLBACK_AUDITDENY_DISABLE 128 #define AVC_CALLBACK_ADD_XPERMS 256 int avc_add_callback(int (*callback)(u32 event), u32 events); /* Exported to selinuxfs */ struct selinux_avc; int avc_get_hash_stats(struct selinux_avc *avc, char *page); unsigned int avc_get_cache_threshold(struct selinux_avc *avc); void avc_set_cache_threshold(struct selinux_avc *avc, unsigned int cache_threshold); /* Attempt to free avc node cache */ void avc_disable(void); #ifdef CONFIG_SECURITY_SELINUX_AVC_STATS DECLARE_PER_CPU(struct avc_cache_stats, avc_cache_stats); #endif #endif /* _SELINUX_AVC_H_ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. NET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the Ethernet handlers. * * Version: @(#)eth.h 1.0.4 05/13/93 * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * * Relocated to include/linux where it belongs by Alan Cox * <gw4pts@gw4pts.ampr.org> */ #ifndef _LINUX_ETHERDEVICE_H #define _LINUX_ETHERDEVICE_H #include <linux/if_ether.h> #include <linux/netdevice.h> #include <linux/random.h> #include <linux/crc32.h> #include <asm/unaligned.h> #include <asm/bitsperlong.h> #ifdef __KERNEL__ struct device; int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr); unsigned char *arch_get_platform_mac_address(void); int nvmem_get_mac_address(struct device *dev, void *addrbuf); u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len); __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); extern const struct header_ops eth_header_ops; int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned len); int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr); int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); void eth_header_cache_update(struct hh_cache *hh, const struct net_device *dev, const unsigned char *haddr); __be16 eth_header_parse_protocol(const struct sk_buff *skb); int eth_prepare_mac_addr_change(struct net_device *dev, void *p); void eth_commit_mac_addr_change(struct net_device *dev, void *p); int eth_mac_addr(struct net_device *dev, void *p); int eth_validate_addr(struct net_device *dev); struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, unsigned int rxqs); #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1) #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count) struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv, unsigned int txqs, unsigned int rxqs); #define devm_alloc_etherdev(dev, sizeof_priv) devm_alloc_etherdev_mqs(dev, sizeof_priv, 1, 1) struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb); int eth_gro_complete(struct sk_buff *skb, int nhoff); /* Reserved Ethernet Addresses per IEEE 802.1Q */ static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; #define eth_stp_addr eth_reserved_addr_base /** * is_link_local_ether_addr - Determine if given Ethernet address is link-local * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if address is link local reserved addr (01:80:c2:00:00:0X) per * IEEE 802.1Q 8.6.3 Frame filtering. * * Please note: addr must be aligned to u16. */ static inline bool is_link_local_ether_addr(const u8 *addr) { __be16 *a = (__be16 *)addr; static const __be16 *b = (const __be16 *)eth_reserved_addr_base; static const __be16 m = cpu_to_be16(0xfff0); #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) return (((*(const u32 *)addr) ^ (*(const u32 *)b)) | (__force int)((a[2] ^ b[2]) & m)) == 0; #else return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0; #endif } /** * is_zero_ether_addr - Determine if give Ethernet address is all zeros. * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if the address is all zeroes. * * Please note: addr must be aligned to u16. */ static inline bool is_zero_ether_addr(const u8 *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) return ((*(const u32 *)addr) | (*(const u16 *)(addr + 4))) == 0; #else return (*(const u16 *)(addr + 0) | *(const u16 *)(addr + 2) | *(const u16 *)(addr + 4)) == 0; #endif } /** * is_multicast_ether_addr - Determine if the Ethernet address is a multicast. * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if the address is a multicast address. * By definition the broadcast address is also a multicast address. */ static inline bool is_multicast_ether_addr(const u8 *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) u32 a = *(const u32 *)addr; #else u16 a = *(const u16 *)addr; #endif #ifdef __BIG_ENDIAN return 0x01 & (a >> ((sizeof(a) * 8) - 8)); #else return 0x01 & a; #endif } static inline bool is_multicast_ether_addr_64bits(const u8 addr[6+2]) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 #ifdef __BIG_ENDIAN return 0x01 & ((*(const u64 *)addr) >> 56); #else return 0x01 & (*(const u64 *)addr); #endif #else return is_multicast_ether_addr(addr); #endif } /** * is_local_ether_addr - Determine if the Ethernet address is locally-assigned one (IEEE 802). * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if the address is a local address. */ static inline bool is_local_ether_addr(const u8 *addr) { return 0x02 & addr[0]; } /** * is_broadcast_ether_addr - Determine if the Ethernet address is broadcast * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if the address is the broadcast address. * * Please note: addr must be aligned to u16. */ static inline bool is_broadcast_ether_addr(const u8 *addr) { return (*(const u16 *)(addr + 0) & *(const u16 *)(addr + 2) & *(const u16 *)(addr + 4)) == 0xffff; } /** * is_unicast_ether_addr - Determine if the Ethernet address is unicast * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if the address is a unicast address. */ static inline bool is_unicast_ether_addr(const u8 *addr) { return !is_multicast_ether_addr(addr); } /** * is_valid_ether_addr - Determine if the given Ethernet address is valid * @addr: Pointer to a six-byte array containing the Ethernet address * * Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is not * a multicast address, and is not FF:FF:FF:FF:FF:FF. * * Return true if the address is valid. * * Please note: addr must be aligned to u16. */ static inline bool is_valid_ether_addr(const u8 *addr) { /* FF:FF:FF:FF:FF:FF is a multicast address so we don't need to * explicitly check for it here. */ return !is_multicast_ether_addr(addr) && !is_zero_ether_addr(addr); } /** * eth_proto_is_802_3 - Determine if a given Ethertype/length is a protocol * @proto: Ethertype/length value to be tested * * Check that the value from the Ethertype/length field is a valid Ethertype. * * Return true if the valid is an 802.3 supported Ethertype. */ static inline bool eth_proto_is_802_3(__be16 proto) { #ifndef __BIG_ENDIAN /* if CPU is little endian mask off bits representing LSB */ proto &= htons(0xFF00); #endif /* cast both to u16 and compare since LSB can be ignored */ return (__force u16)proto >= (__force u16)htons(ETH_P_802_3_MIN); } /** * eth_random_addr - Generate software assigned random Ethernet address * @addr: Pointer to a six-byte array containing the Ethernet address * * Generate a random Ethernet address (MAC) that is not multicast * and has the local assigned bit set. */ static inline void eth_random_addr(u8 *addr) { get_random_bytes(addr, ETH_ALEN); addr[0] &= 0xfe; /* clear multicast bit */ addr[0] |= 0x02; /* set local assignment bit (IEEE802) */ } #define random_ether_addr(addr) eth_random_addr(addr) /** * eth_broadcast_addr - Assign broadcast address * @addr: Pointer to a six-byte array containing the Ethernet address * * Assign the broadcast address to the given address array. */ static inline void eth_broadcast_addr(u8 *addr) { memset(addr, 0xff, ETH_ALEN); } /** * eth_zero_addr - Assign zero address * @addr: Pointer to a six-byte array containing the Ethernet address * * Assign the zero address to the given address array. */ static inline void eth_zero_addr(u8 *addr) { memset(addr, 0x00, ETH_ALEN); } /** * eth_hw_addr_random - Generate software assigned random Ethernet and * set device flag * @dev: pointer to net_device structure * * Generate a random Ethernet address (MAC) to be used by a net device * and set addr_assign_type so the state can be read by sysfs and be * used by userspace. */ static inline void eth_hw_addr_random(struct net_device *dev) { dev->addr_assign_type = NET_ADDR_RANDOM; eth_random_addr(dev->dev_addr); } /** * eth_hw_addr_crc - Calculate CRC from netdev_hw_addr * @ha: pointer to hardware address * * Calculate CRC from a hardware address as basis for filter hashes. */ static inline u32 eth_hw_addr_crc(struct netdev_hw_addr *ha) { return ether_crc(ETH_ALEN, ha->addr); } /** * ether_addr_copy - Copy an Ethernet address * @dst: Pointer to a six-byte array Ethernet address destination * @src: Pointer to a six-byte array Ethernet address source * * Please note: dst & src must both be aligned to u16. */ static inline void ether_addr_copy(u8 *dst, const u8 *src) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) *(u32 *)dst = *(const u32 *)src; *(u16 *)(dst + 4) = *(const u16 *)(src + 4); #else u16 *a = (u16 *)dst; const u16 *b = (const u16 *)src; a[0] = b[0]; a[1] = b[1]; a[2] = b[2]; #endif } /** * eth_hw_addr_inherit - Copy dev_addr from another net_device * @dst: pointer to net_device to copy dev_addr to * @src: pointer to net_device to copy dev_addr from * * Copy the Ethernet address from one net_device to another along with * the address attributes (addr_assign_type). */ static inline void eth_hw_addr_inherit(struct net_device *dst, struct net_device *src) { dst->addr_assign_type = src->addr_assign_type; ether_addr_copy(dst->dev_addr, src->dev_addr); } /** * ether_addr_equal - Compare two Ethernet addresses * @addr1: Pointer to a six-byte array containing the Ethernet address * @addr2: Pointer other six-byte array containing the Ethernet address * * Compare two Ethernet addresses, returns true if equal * * Please note: addr1 & addr2 must both be aligned to u16. */ static inline bool ether_addr_equal(const u8 *addr1, const u8 *addr2) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) u32 fold = ((*(const u32 *)addr1) ^ (*(const u32 *)addr2)) | ((*(const u16 *)(addr1 + 4)) ^ (*(const u16 *)(addr2 + 4))); return fold == 0; #else const u16 *a = (const u16 *)addr1; const u16 *b = (const u16 *)addr2; return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0; #endif } /** * ether_addr_equal_64bits - Compare two Ethernet addresses * @addr1: Pointer to an array of 8 bytes * @addr2: Pointer to an other array of 8 bytes * * Compare two Ethernet addresses, returns true if equal, false otherwise. * * The function doesn't need any conditional branches and possibly uses * word memory accesses on CPU allowing cheap unaligned memory reads. * arrays = { byte1, byte2, byte3, byte4, byte5, byte6, pad1, pad2 } * * Please note that alignment of addr1 & addr2 are only guaranteed to be 16 bits. */ static inline bool ether_addr_equal_64bits(const u8 addr1[6+2], const u8 addr2[6+2]) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 u64 fold = (*(const u64 *)addr1) ^ (*(const u64 *)addr2); #ifdef __BIG_ENDIAN return (fold >> 16) == 0; #else return (fold << 16) == 0; #endif #else return ether_addr_equal(addr1, addr2); #endif } /** * ether_addr_equal_unaligned - Compare two not u16 aligned Ethernet addresses * @addr1: Pointer to a six-byte array containing the Ethernet address * @addr2: Pointer other six-byte array containing the Ethernet address * * Compare two Ethernet addresses, returns true if equal * * Please note: Use only when any Ethernet address may not be u16 aligned. */ static inline bool ether_addr_equal_unaligned(const u8 *addr1, const u8 *addr2) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) return ether_addr_equal(addr1, addr2); #else return memcmp(addr1, addr2, ETH_ALEN) == 0; #endif } /** * ether_addr_equal_masked - Compare two Ethernet addresses with a mask * @addr1: Pointer to a six-byte array containing the 1st Ethernet address * @addr2: Pointer to a six-byte array containing the 2nd Ethernet address * @mask: Pointer to a six-byte array containing the Ethernet address bitmask * * Compare two Ethernet addresses with a mask, returns true if for every bit * set in the bitmask the equivalent bits in the ethernet addresses are equal. * Using a mask with all bits set is a slower ether_addr_equal. */ static inline bool ether_addr_equal_masked(const u8 *addr1, const u8 *addr2, const u8 *mask) { int i; for (i = 0; i < ETH_ALEN; i++) { if ((addr1[i] ^ addr2[i]) & mask[i]) return false; } return true; } /** * ether_addr_to_u64 - Convert an Ethernet address into a u64 value. * @addr: Pointer to a six-byte array containing the Ethernet address * * Return a u64 value of the address */ static inline u64 ether_addr_to_u64(const u8 *addr) { u64 u = 0; int i; for (i = 0; i < ETH_ALEN; i++) u = u << 8 | addr[i]; return u; } /** * u64_to_ether_addr - Convert a u64 to an Ethernet address. * @u: u64 to convert to an Ethernet MAC address * @addr: Pointer to a six-byte array to contain the Ethernet address */ static inline void u64_to_ether_addr(u64 u, u8 *addr) { int i; for (i = ETH_ALEN - 1; i >= 0; i--) { addr[i] = u & 0xff; u = u >> 8; } } /** * eth_addr_dec - Decrement the given MAC address * * @addr: Pointer to a six-byte array containing Ethernet address to decrement */ static inline void eth_addr_dec(u8 *addr) { u64 u = ether_addr_to_u64(addr); u--; u64_to_ether_addr(u, addr); } /** * eth_addr_inc() - Increment the given MAC address. * @addr: Pointer to a six-byte array containing Ethernet address to increment. */ static inline void eth_addr_inc(u8 *addr) { u64 u = ether_addr_to_u64(addr); u++; u64_to_ether_addr(u, addr); } /** * is_etherdev_addr - Tell if given Ethernet address belongs to the device. * @dev: Pointer to a device structure * @addr: Pointer to a six-byte array containing the Ethernet address * * Compare passed address with all addresses of the device. Return true if the * address if one of the device addresses. * * Note that this function calls ether_addr_equal_64bits() so take care of * the right padding. */ static inline bool is_etherdev_addr(const struct net_device *dev, const u8 addr[6 + 2]) { struct netdev_hw_addr *ha; bool res = false; rcu_read_lock(); for_each_dev_addr(dev, ha) { res = ether_addr_equal_64bits(addr, ha->addr); if (res) break; } rcu_read_unlock(); return res; } #endif /* __KERNEL__ */ /** * compare_ether_header - Compare two Ethernet headers * @a: Pointer to Ethernet header * @b: Pointer to Ethernet header * * Compare two Ethernet headers, returns 0 if equal. * This assumes that the network header (i.e., IP header) is 4-byte * aligned OR the platform can handle unaligned access. This is the * case for all packets coming into netif_receive_skb or similar * entry points. */ static inline unsigned long compare_ether_header(const void *a, const void *b) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 unsigned long fold; /* * We want to compare 14 bytes: * [a0 ... a13] ^ [b0 ... b13] * Use two long XOR, ORed together, with an overlap of two bytes. * [a0 a1 a2 a3 a4 a5 a6 a7 ] ^ [b0 b1 b2 b3 b4 b5 b6 b7 ] | * [a6 a7 a8 a9 a10 a11 a12 a13] ^ [b6 b7 b8 b9 b10 b11 b12 b13] * This means the [a6 a7] ^ [b6 b7] part is done two times. */ fold = *(unsigned long *)a ^ *(unsigned long *)b; fold |= *(unsigned long *)(a + 6) ^ *(unsigned long *)(b + 6); return fold; #else u32 *a32 = (u32 *)((u8 *)a + 2); u32 *b32 = (u32 *)((u8 *)b + 2); return (*(u16 *)a ^ *(u16 *)b) | (a32[0] ^ b32[0]) | (a32[1] ^ b32[1]) | (a32[2] ^ b32[2]); #endif } /** * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame * @skb: Buffer to pad * * An Ethernet frame should have a minimum size of 60 bytes. This function * takes short frames and pads them with zeros up to the 60 byte limit. */ static inline int eth_skb_pad(struct sk_buff *skb) { return skb_put_padto(skb, ETH_ZLEN); } #endif /* _LINUX_ETHERDEVICE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_MM_H #define _LINUX_SCHED_MM_H #include <linux/kernel.h> #include <linux/atomic.h> #include <linux/sched.h> #include <linux/mm_types.h> #include <linux/gfp.h> #include <linux/sync_core.h> /* * Routines for handling mm_structs */ extern struct mm_struct *mm_alloc(void); /** * mmgrab() - Pin a &struct mm_struct. * @mm: The &struct mm_struct to pin. * * Make sure that @mm will not get freed even after the owning task * exits. This doesn't guarantee that the associated address space * will still exist later on and mmget_not_zero() has to be used before * accessing it. * * This is a preferred way to pin @mm for a longer/unbounded amount * of time. * * Use mmdrop() to release the reference acquired by mmgrab(). * * See also <Documentation/vm/active_mm.rst> for an in-depth explanation * of &mm_struct.mm_count vs &mm_struct.mm_users. */ static inline void mmgrab(struct mm_struct *mm) { atomic_inc(&mm->mm_count); } extern void __mmdrop(struct mm_struct *mm); static inline void mmdrop(struct mm_struct *mm) { /* * The implicit full barrier implied by atomic_dec_and_test() is * required by the membarrier system call before returning to * user-space, after storing to rq->curr. */ if (unlikely(atomic_dec_and_test(&mm->mm_count))) __mmdrop(mm); } /** * mmget() - Pin the address space associated with a &struct mm_struct. * @mm: The address space to pin. * * Make sure that the address space of the given &struct mm_struct doesn't * go away. This does not protect against parts of the address space being * modified or freed, however. * * Never use this function to pin this address space for an * unbounded/indefinite amount of time. * * Use mmput() to release the reference acquired by mmget(). * * See also <Documentation/vm/active_mm.rst> for an in-depth explanation * of &mm_struct.mm_count vs &mm_struct.mm_users. */ static inline void mmget(struct mm_struct *mm) { atomic_inc(&mm->mm_users); } static inline bool mmget_not_zero(struct mm_struct *mm) { return atomic_inc_not_zero(&mm->mm_users); } /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); #ifdef CONFIG_MMU /* same as above but performs the slow path from the async context. Can * be called from the atomic context as well */ void mmput_async(struct mm_struct *); #endif /* Grab a reference to a task's mm, if it is not already going away */ extern struct mm_struct *get_task_mm(struct task_struct *task); /* * Grab a reference to a task's mm, if it is not already going away * and ptrace_may_access with the mode parameter passed to it * succeeds. */ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); /* Remove the current tasks stale references to the old mm_struct on exit() */ extern void exit_mm_release(struct task_struct *, struct mm_struct *); /* Remove the current tasks stale references to the old mm_struct on exec() */ extern void exec_mm_release(struct task_struct *, struct mm_struct *); #ifdef CONFIG_MEMCG extern void mm_update_next_owner(struct mm_struct *mm); #else static inline void mm_update_next_owner(struct mm_struct *mm) { } #endif /* CONFIG_MEMCG */ #ifdef CONFIG_MMU extern void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack); extern unsigned long arch_get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); extern unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); #else static inline void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) {} #endif static inline bool in_vfork(struct task_struct *tsk) { bool ret; /* * need RCU to access ->real_parent if CLONE_VM was used along with * CLONE_PARENT. * * We check real_parent->mm == tsk->mm because CLONE_VFORK does not * imply CLONE_VM * * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus * ->real_parent is not necessarily the task doing vfork(), so in * theory we can't rely on task_lock() if we want to dereference it. * * And in this case we can't trust the real_parent->mm == tsk->mm * check, it can be false negative. But we do not care, if init or * another oom-unkillable task does this it should blame itself. */ rcu_read_lock(); ret = tsk->vfork_done && rcu_dereference(tsk->real_parent)->mm == tsk->mm; rcu_read_unlock(); return ret; } /* * Applies per-task gfp context to the given allocation flags. * PF_MEMALLOC_NOIO implies GFP_NOIO * PF_MEMALLOC_NOFS implies GFP_NOFS */ static inline gfp_t current_gfp_context(gfp_t flags) { unsigned int pflags = READ_ONCE(current->flags); if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS))) { /* * NOIO implies both NOIO and NOFS and it is a weaker context * so always make sure it makes precedence */ if (pflags & PF_MEMALLOC_NOIO) flags &= ~(__GFP_IO | __GFP_FS); else if (pflags & PF_MEMALLOC_NOFS) flags &= ~__GFP_FS; } return flags; } #ifdef CONFIG_LOCKDEP extern void __fs_reclaim_acquire(void); extern void __fs_reclaim_release(void); extern void fs_reclaim_acquire(gfp_t gfp_mask); extern void fs_reclaim_release(gfp_t gfp_mask); #else static inline void __fs_reclaim_acquire(void) { } static inline void __fs_reclaim_release(void) { } static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } static inline void fs_reclaim_release(gfp_t gfp_mask) { } #endif /** * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope. * * This functions marks the beginning of the GFP_NOIO allocation scope. * All further allocations will implicitly drop __GFP_IO flag and so * they are safe for the IO critical section from the allocation recursion * point of view. Use memalloc_noio_restore to end the scope with flags * returned by this function. * * This function is safe to be used from any context. */ static inline unsigned int memalloc_noio_save(void) { unsigned int flags = current->flags & PF_MEMALLOC_NOIO; current->flags |= PF_MEMALLOC_NOIO; return flags; } /** * memalloc_noio_restore - Ends the implicit GFP_NOIO scope. * @flags: Flags to restore. * * Ends the implicit GFP_NOIO scope started by memalloc_noio_save function. * Always make sure that the given flags is the return value from the * pairing memalloc_noio_save call. */ static inline void memalloc_noio_restore(unsigned int flags) { current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; } /** * memalloc_nofs_save - Marks implicit GFP_NOFS allocation scope. * * This functions marks the beginning of the GFP_NOFS allocation scope. * All further allocations will implicitly drop __GFP_FS flag and so * they are safe for the FS critical section from the allocation recursion * point of view. Use memalloc_nofs_restore to end the scope with flags * returned by this function. * * This function is safe to be used from any context. */ static inline unsigned int memalloc_nofs_save(void) { unsigned int flags = current->flags & PF_MEMALLOC_NOFS; current->flags |= PF_MEMALLOC_NOFS; return flags; } /** * memalloc_nofs_restore - Ends the implicit GFP_NOFS scope. * @flags: Flags to restore. * * Ends the implicit GFP_NOFS scope started by memalloc_nofs_save function. * Always make sure that the given flags is the return value from the * pairing memalloc_nofs_save call. */ static inline void memalloc_nofs_restore(unsigned int flags) { current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags; } static inline unsigned int memalloc_noreclaim_save(void) { unsigned int flags = current->flags & PF_MEMALLOC; current->flags |= PF_MEMALLOC; return flags; } static inline void memalloc_noreclaim_restore(unsigned int flags) { current->flags = (current->flags & ~PF_MEMALLOC) | flags; } #ifdef CONFIG_CMA static inline unsigned int memalloc_nocma_save(void) { unsigned int flags = current->flags & PF_MEMALLOC_NOCMA; current->flags |= PF_MEMALLOC_NOCMA; return flags; } static inline void memalloc_nocma_restore(unsigned int flags) { current->flags = (current->flags & ~PF_MEMALLOC_NOCMA) | flags; } #else static inline unsigned int memalloc_nocma_save(void) { return 0; } static inline void memalloc_nocma_restore(unsigned int flags) { } #endif #ifdef CONFIG_MEMCG DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg); /** * set_active_memcg - Starts the remote memcg charging scope. * @memcg: memcg to charge. * * This function marks the beginning of the remote memcg charging scope. All the * __GFP_ACCOUNT allocations till the end of the scope will be charged to the * given memcg. * * NOTE: This function can nest. Users must save the return value and * reset the previous value after their own charging scope is over. */ static inline struct mem_cgroup * set_active_memcg(struct mem_cgroup *memcg) { struct mem_cgroup *old; if (in_interrupt()) { old = this_cpu_read(int_active_memcg); this_cpu_write(int_active_memcg, memcg); } else { old = current->active_memcg; current->active_memcg = memcg; } return old; } #else static inline struct mem_cgroup * set_active_memcg(struct mem_cgroup *memcg) { return NULL; } #endif #ifdef CONFIG_MEMBARRIER enum { MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0), MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1), MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY = (1U << 2), MEMBARRIER_STATE_GLOBAL_EXPEDITED = (1U << 3), MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY = (1U << 4), MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE = (1U << 5), MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY = (1U << 6), MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ = (1U << 7), }; enum { MEMBARRIER_FLAG_SYNC_CORE = (1U << 0), MEMBARRIER_FLAG_RSEQ = (1U << 1), }; #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS #include <asm/membarrier.h> #endif static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) { if (current->mm != mm) return; if (likely(!(atomic_read(&mm->membarrier_state) & MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE))) return; sync_core_before_usermode(); } extern void membarrier_exec_mmap(struct mm_struct *mm); #else #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS static inline void membarrier_arch_switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { } #endif static inline void membarrier_exec_mmap(struct mm_struct *mm) { } static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) { } #endif #endif /* _LINUX_SCHED_MM_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_JUMP_LABEL_H #define _LINUX_JUMP_LABEL_H /* * Jump label support * * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com> * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra * * DEPRECATED API: * * The use of 'struct static_key' directly, is now DEPRECATED. In addition * static_key_{true,false}() is also DEPRECATED. IE DO NOT use the following: * * struct static_key false = STATIC_KEY_INIT_FALSE; * struct static_key true = STATIC_KEY_INIT_TRUE; * static_key_true() * static_key_false() * * The updated API replacements are: * * DEFINE_STATIC_KEY_TRUE(key); * DEFINE_STATIC_KEY_FALSE(key); * DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count); * DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count); * static_branch_likely() * static_branch_unlikely() * * Jump labels provide an interface to generate dynamic branches using * self-modifying code. Assuming toolchain and architecture support, if we * define a "key" that is initially false via "DEFINE_STATIC_KEY_FALSE(key)", * an "if (static_branch_unlikely(&key))" statement is an unconditional branch * (which defaults to false - and the true block is placed out of line). * Similarly, we can define an initially true key via * "DEFINE_STATIC_KEY_TRUE(key)", and use it in the same * "if (static_branch_unlikely(&key))", in which case we will generate an * unconditional branch to the out-of-line true branch. Keys that are * initially true or false can be using in both static_branch_unlikely() * and static_branch_likely() statements. * * At runtime we can change the branch target by setting the key * to true via a call to static_branch_enable(), or false using * static_branch_disable(). If the direction of the branch is switched by * these calls then we run-time modify the branch target via a * no-op -> jump or jump -> no-op conversion. For example, for an * initially false key that is used in an "if (static_branch_unlikely(&key))" * statement, setting the key to true requires us to patch in a jump * to the out-of-line of true branch. * * In addition to static_branch_{enable,disable}, we can also reference count * the key or branch direction via static_branch_{inc,dec}. Thus, * static_branch_inc() can be thought of as a 'make more true' and * static_branch_dec() as a 'make more false'. * * Since this relies on modifying code, the branch modifying functions * must be considered absolute slow paths (machine wide synchronization etc.). * OTOH, since the affected branches are unconditional, their runtime overhead * will be absolutely minimal, esp. in the default (off) case where the total * effect is a single NOP of appropriate size. The on case will patch in a jump * to the out-of-line block. * * When the control is directly exposed to userspace, it is prudent to delay the * decrement to avoid high frequency code modifications which can (and do) * cause significant performance degradation. Struct static_key_deferred and * static_key_slow_dec_deferred() provide for this. * * Lacking toolchain and or architecture support, static keys fall back to a * simple conditional branch. * * Additional babbling in: Documentation/staging/static-keys.rst */ #ifndef __ASSEMBLY__ #include <linux/types.h> #include <linux/compiler.h> extern bool static_key_initialized; #define STATIC_KEY_CHECK_USE(key) WARN(!static_key_initialized, \ "%s(): static key '%pS' used before call to jump_label_init()", \ __func__, (key)) #ifdef CONFIG_JUMP_LABEL struct static_key { atomic_t enabled; /* * Note: * To make anonymous unions work with old compilers, the static * initialization of them requires brackets. This creates a dependency * on the order of the struct with the initializers. If any fields * are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need * to be modified. * * bit 0 => 1 if key is initially true * 0 if initially false * bit 1 => 1 if points to struct static_key_mod * 0 if points to struct jump_entry */ union { unsigned long type; struct jump_entry *entries; struct static_key_mod *next; }; }; #else struct static_key { atomic_t enabled; }; #endif /* CONFIG_JUMP_LABEL */ #endif /* __ASSEMBLY__ */ #ifdef CONFIG_JUMP_LABEL #include <asm/jump_label.h> #ifndef __ASSEMBLY__ #ifdef CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE struct jump_entry { s32 code; s32 target; long key; // key may be far away from the core kernel under KASLR }; static inline unsigned long jump_entry_code(const struct jump_entry *entry) { return (unsigned long)&entry->code + entry->code; } static inline unsigned long jump_entry_target(const struct jump_entry *entry) { return (unsigned long)&entry->target + entry->target; } static inline struct static_key *jump_entry_key(const struct jump_entry *entry) { long offset = entry->key & ~3L; return (struct static_key *)((unsigned long)&entry->key + offset); } #else static inline unsigned long jump_entry_code(const struct jump_entry *entry) { return entry->code; } static inline unsigned long jump_entry_target(const struct jump_entry *entry) { return entry->target; } static inline struct static_key *jump_entry_key(const struct jump_entry *entry) { return (struct static_key *)((unsigned long)entry->key & ~3UL); } #endif static inline bool jump_entry_is_branch(const struct jump_entry *entry) { return (unsigned long)entry->key & 1UL; } static inline bool jump_entry_is_init(const struct jump_entry *entry) { return (unsigned long)entry->key & 2UL; } static inline void jump_entry_set_init(struct jump_entry *entry) { entry->key |= 2; } #endif #endif #ifndef __ASSEMBLY__ enum jump_label_type { JUMP_LABEL_NOP = 0, JUMP_LABEL_JMP, }; struct module; #ifdef CONFIG_JUMP_LABEL #define JUMP_TYPE_FALSE 0UL #define JUMP_TYPE_TRUE 1UL #define JUMP_TYPE_LINKED 2UL #define JUMP_TYPE_MASK 3UL static __always_inline bool static_key_false(struct static_key *key) { return arch_static_branch(key, false); } static __always_inline bool static_key_true(struct static_key *key) { return !arch_static_branch(key, true); } extern struct jump_entry __start___jump_table[]; extern struct jump_entry __stop___jump_table[]; extern void jump_label_init(void); extern void jump_label_lock(void); extern void jump_label_unlock(void); extern void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type); extern void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type); extern bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type); extern void arch_jump_label_transform_apply(void); extern int jump_label_text_reserved(void *start, void *end); extern void static_key_slow_inc(struct static_key *key); extern void static_key_slow_dec(struct static_key *key); extern void static_key_slow_inc_cpuslocked(struct static_key *key); extern void static_key_slow_dec_cpuslocked(struct static_key *key); extern void jump_label_apply_nops(struct module *mod); extern int static_key_count(struct static_key *key); extern void static_key_enable(struct static_key *key); extern void static_key_disable(struct static_key *key); extern void static_key_enable_cpuslocked(struct static_key *key); extern void static_key_disable_cpuslocked(struct static_key *key); /* * We should be using ATOMIC_INIT() for initializing .enabled, but * the inclusion of atomic.h is problematic for inclusion of jump_label.h * in 'low-level' headers. Thus, we are initializing .enabled with a * raw value, but have added a BUILD_BUG_ON() to catch any issues in * jump_label_init() see: kernel/jump_label.c. */ #define STATIC_KEY_INIT_TRUE \ { .enabled = { 1 }, \ { .entries = (void *)JUMP_TYPE_TRUE } } #define STATIC_KEY_INIT_FALSE \ { .enabled = { 0 }, \ { .entries = (void *)JUMP_TYPE_FALSE } } #else /* !CONFIG_JUMP_LABEL */ #include <linux/atomic.h> #include <linux/bug.h> static inline int static_key_count(struct static_key *key) { return atomic_read(&key->enabled); } static __always_inline void jump_label_init(void) { static_key_initialized = true; } static __always_inline bool static_key_false(struct static_key *key) { if (unlikely(static_key_count(key) > 0)) return true; return false; } static __always_inline bool static_key_true(struct static_key *key) { if (likely(static_key_count(key) > 0)) return true; return false; } static inline void static_key_slow_inc(struct static_key *key) { STATIC_KEY_CHECK_USE(key); atomic_inc(&key->enabled); } static inline void static_key_slow_dec(struct static_key *key) { STATIC_KEY_CHECK_USE(key); atomic_dec(&key->enabled); } #define static_key_slow_inc_cpuslocked(key) static_key_slow_inc(key) #define static_key_slow_dec_cpuslocked(key) static_key_slow_dec(key) static inline int jump_label_text_reserved(void *start, void *end) { return 0; } static inline void jump_label_lock(void) {} static inline void jump_label_unlock(void) {} static inline int jump_label_apply_nops(struct module *mod) { return 0; } static inline void static_key_enable(struct static_key *key) { STATIC_KEY_CHECK_USE(key); if (atomic_read(&key->enabled) != 0) { WARN_ON_ONCE(atomic_read(&key->enabled) != 1); return; } atomic_set(&key->enabled, 1); } static inline void static_key_disable(struct static_key *key) { STATIC_KEY_CHECK_USE(key); if (atomic_read(&key->enabled) != 1) { WARN_ON_ONCE(atomic_read(&key->enabled) != 0); return; } atomic_set(&key->enabled, 0); } #define static_key_enable_cpuslocked(k) static_key_enable((k)) #define static_key_disable_cpuslocked(k) static_key_disable((k)) #define STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) } #define STATIC_KEY_INIT_FALSE { .enabled = ATOMIC_INIT(0) } #endif /* CONFIG_JUMP_LABEL */ #define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE #define jump_label_enabled static_key_enabled /* -------------------------------------------------------------------------- */ /* * Two type wrappers around static_key, such that we can use compile time * type differentiation to emit the right code. * * All the below code is macros in order to play type games. */ struct static_key_true { struct static_key key; }; struct static_key_false { struct static_key key; }; #define STATIC_KEY_TRUE_INIT (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE, } #define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, } #define DEFINE_STATIC_KEY_TRUE(name) \ struct static_key_true name = STATIC_KEY_TRUE_INIT #define DEFINE_STATIC_KEY_TRUE_RO(name) \ struct static_key_true name __ro_after_init = STATIC_KEY_TRUE_INIT #define DECLARE_STATIC_KEY_TRUE(name) \ extern struct static_key_true name #define DEFINE_STATIC_KEY_FALSE(name) \ struct static_key_false name = STATIC_KEY_FALSE_INIT #define DEFINE_STATIC_KEY_FALSE_RO(name) \ struct static_key_false name __ro_after_init = STATIC_KEY_FALSE_INIT #define DECLARE_STATIC_KEY_FALSE(name) \ extern struct static_key_false name #define DEFINE_STATIC_KEY_ARRAY_TRUE(name, count) \ struct static_key_true name[count] = { \ [0 ... (count) - 1] = STATIC_KEY_TRUE_INIT, \ } #define DEFINE_STATIC_KEY_ARRAY_FALSE(name, count) \ struct static_key_false name[count] = { \ [0 ... (count) - 1] = STATIC_KEY_FALSE_INIT, \ } extern bool ____wrong_branch_error(void); #define static_key_enabled(x) \ ({ \ if (!__builtin_types_compatible_p(typeof(*x), struct static_key) && \ !__builtin_types_compatible_p(typeof(*x), struct static_key_true) &&\ !__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ ____wrong_branch_error(); \ static_key_count((struct static_key *)x) > 0; \ }) #ifdef CONFIG_JUMP_LABEL /* * Combine the right initial value (type) with the right branch order * to generate the desired result. * * * type\branch| likely (1) | unlikely (0) * -----------+-----------------------+------------------ * | | * true (1) | ... | ... * | NOP | JMP L * | <br-stmts> | 1: ... * | L: ... | * | | * | | L: <br-stmts> * | | jmp 1b * | | * -----------+-----------------------+------------------ * | | * false (0) | ... | ... * | JMP L | NOP * | <br-stmts> | 1: ... * | L: ... | * | | * | | L: <br-stmts> * | | jmp 1b * | | * -----------+-----------------------+------------------ * * The initial value is encoded in the LSB of static_key::entries, * type: 0 = false, 1 = true. * * The branch type is encoded in the LSB of jump_entry::key, * branch: 0 = unlikely, 1 = likely. * * This gives the following logic table: * * enabled type branch instuction * -----------------------------+----------- * 0 0 0 | NOP * 0 0 1 | JMP * 0 1 0 | NOP * 0 1 1 | JMP * * 1 0 0 | JMP * 1 0 1 | NOP * 1 1 0 | JMP * 1 1 1 | NOP * * Which gives the following functions: * * dynamic: instruction = enabled ^ branch * static: instruction = type ^ branch * * See jump_label_type() / jump_label_init_type(). */ #define static_branch_likely(x) \ ({ \ bool branch; \ if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \ branch = !arch_static_branch(&(x)->key, true); \ else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ branch = !arch_static_branch_jump(&(x)->key, true); \ else \ branch = ____wrong_branch_error(); \ likely(branch); \ }) #define static_branch_unlikely(x) \ ({ \ bool branch; \ if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \ branch = arch_static_branch_jump(&(x)->key, false); \ else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ branch = arch_static_branch(&(x)->key, false); \ else \ branch = ____wrong_branch_error(); \ unlikely(branch); \ }) #else /* !CONFIG_JUMP_LABEL */ #define static_branch_likely(x) likely(static_key_enabled(&(x)->key)) #define static_branch_unlikely(x) unlikely(static_key_enabled(&(x)->key)) #endif /* CONFIG_JUMP_LABEL */ /* * Advanced usage; refcount, branch is enabled when: count != 0 */ #define static_branch_inc(x) static_key_slow_inc(&(x)->key) #define static_branch_dec(x) static_key_slow_dec(&(x)->key) #define static_branch_inc_cpuslocked(x) static_key_slow_inc_cpuslocked(&(x)->key) #define static_branch_dec_cpuslocked(x) static_key_slow_dec_cpuslocked(&(x)->key) /* * Normal usage; boolean enable/disable. */ #define static_branch_enable(x) static_key_enable(&(x)->key) #define static_branch_disable(x) static_key_disable(&(x)->key) #define static_branch_enable_cpuslocked(x) static_key_enable_cpuslocked(&(x)->key) #define static_branch_disable_cpuslocked(x) static_key_disable_cpuslocked(&(x)->key) #endif /* __ASSEMBLY__ */ #endif /* _LINUX_JUMP_LABEL_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 /* SPDX-License-Identifier: GPL-2.0 */ /* * include/linux/writeback.h */ #ifndef WRITEBACK_H #define WRITEBACK_H #include <linux/sched.h> #include <linux/workqueue.h> #include <linux/fs.h> #include <linux/flex_proportions.h> #include <linux/backing-dev-defs.h> #include <linux/blk_types.h> #include <linux/blk-cgroup.h> struct bio; DECLARE_PER_CPU(int, dirty_throttle_leaks); /* * The 1/4 region under the global dirty thresh is for smooth dirty throttling: * * (thresh - thresh/DIRTY_FULL_SCOPE, thresh) * * Further beyond, all dirtier tasks will enter a loop waiting (possibly long * time) for the dirty pages to drop, unless written enough pages. * * The global dirty threshold is normally equal to the global dirty limit, * except when the system suddenly allocates a lot of anonymous memory and * knocks down the global dirty threshold quickly, in which case the global * dirty limit will follow down slowly to prevent livelocking all dirtier tasks. */ #define DIRTY_SCOPE 8 #define DIRTY_FULL_SCOPE (DIRTY_SCOPE / 2) struct backing_dev_info; /* * fs/fs-writeback.c */ enum writeback_sync_modes { WB_SYNC_NONE, /* Don't wait on anything */ WB_SYNC_ALL, /* Wait on every mapping */ }; /* * A control structure which tells the writeback code what to do. These are * always on the stack, and hence need no locking. They are always initialised * in a manner such that unspecified fields are set to zero. */ struct writeback_control { long nr_to_write; /* Write this many pages, and decrement this for each page written */ long pages_skipped; /* Pages which were not written */ /* * For a_ops->writepages(): if start or end are non-zero then this is * a hint that the filesystem need only write out the pages inside that * byterange. The byte at `end' is included in the writeout request. */ loff_t range_start; loff_t range_end; enum writeback_sync_modes sync_mode; unsigned for_kupdate:1; /* A kupdate writeback */ unsigned for_background:1; /* A background writeback */ unsigned tagged_writepages:1; /* tag-and-write to avoid livelock */ unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ /* * When writeback IOs are bounced through async layers, only the * initial synchronous phase should be accounted towards inode * cgroup ownership arbitration to avoid confusion. Later stages * can set the following flag to disable the accounting. */ unsigned no_cgroup_owner:1; unsigned punt_to_cgroup:1; /* cgrp punting, see __REQ_CGROUP_PUNT */ #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *wb; /* wb this writeback is issued under */ struct inode *inode; /* inode being written out */ /* foreign inode detection, see wbc_detach_inode() */ int wb_id; /* current wb id */ int wb_lcand_id; /* last foreign candidate wb id */ int wb_tcand_id; /* this foreign candidate wb id */ size_t wb_bytes; /* bytes written by current wb */ size_t wb_lcand_bytes; /* bytes written by last candidate */ size_t wb_tcand_bytes; /* bytes written by this candidate */ #endif }; static inline int wbc_to_write_flags(struct writeback_control *wbc) { int flags = 0; if (wbc->punt_to_cgroup) flags = REQ_CGROUP_PUNT; if (wbc->sync_mode == WB_SYNC_ALL) flags |= REQ_SYNC; else if (wbc->for_kupdate || wbc->for_background) flags |= REQ_BACKGROUND; return flags; } static inline struct cgroup_subsys_state * wbc_blkcg_css(struct writeback_control *wbc) { #ifdef CONFIG_CGROUP_WRITEBACK if (wbc->wb) return wbc->wb->blkcg_css; #endif return blkcg_root_css; } /* * A wb_domain represents a domain that wb's (bdi_writeback's) belong to * and are measured against each other in. There always is one global * domain, global_wb_domain, that every wb in the system is a member of. * This allows measuring the relative bandwidth of each wb to distribute * dirtyable memory accordingly. */ struct wb_domain { spinlock_t lock; /* * Scale the writeback cache size proportional to the relative * writeout speed. * * We do this by keeping a floating proportion between BDIs, based * on page writeback completions [end_page_writeback()]. Those * devices that write out pages fastest will get the larger share, * while the slower will get a smaller share. * * We use page writeout completions because we are interested in * getting rid of dirty pages. Having them written out is the * primary goal. * * We introduce a concept of time, a period over which we measure * these events, because demand can/will vary over time. The length * of this period itself is measured in page writeback completions. */ struct fprop_global completions; struct timer_list period_timer; /* timer for aging of completions */ unsigned long period_time; /* * The dirtyable memory and dirty threshold could be suddenly * knocked down by a large amount (eg. on the startup of KVM in a * swapless system). This may throw the system into deep dirty * exceeded state and throttle heavy/light dirtiers alike. To * retain good responsiveness, maintain global_dirty_limit for * tracking slowly down to the knocked down dirty threshold. * * Both fields are protected by ->lock. */ unsigned long dirty_limit_tstamp; unsigned long dirty_limit; }; /** * wb_domain_size_changed - memory available to a wb_domain has changed * @dom: wb_domain of interest * * This function should be called when the amount of memory available to * @dom has changed. It resets @dom's dirty limit parameters to prevent * the past values which don't match the current configuration from skewing * dirty throttling. Without this, when memory size of a wb_domain is * greatly reduced, the dirty throttling logic may allow too many pages to * be dirtied leading to consecutive unnecessary OOMs and may get stuck in * that situation. */ static inline void wb_domain_size_changed(struct wb_domain *dom) { spin_lock(&dom->lock); dom->dirty_limit_tstamp = jiffies; dom->dirty_limit = 0; spin_unlock(&dom->lock); } /* * fs/fs-writeback.c */ struct bdi_writeback; void writeback_inodes_sb(struct super_block *, enum wb_reason reason); void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, enum wb_reason reason); void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason); void sync_inodes_sb(struct super_block *); void wakeup_flusher_threads(enum wb_reason reason); void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi, enum wb_reason reason); void inode_wait_for_writeback(struct inode *inode); void inode_io_list_del(struct inode *inode); /* writeback.h requires fs.h; it, too, is not included from here. */ static inline void wait_on_inode(struct inode *inode) { might_sleep(); wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE); } #ifdef CONFIG_CGROUP_WRITEBACK #include <linux/cgroup.h> #include <linux/bio.h> void __inode_attach_wb(struct inode *inode, struct page *page); void wbc_attach_and_unlock_inode(struct writeback_control *wbc, struct inode *inode) __releases(&inode->i_lock); void wbc_detach_inode(struct writeback_control *wbc); void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, size_t bytes); int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages, enum wb_reason reason, struct wb_completion *done); void cgroup_writeback_umount(void); /** * inode_attach_wb - associate an inode with its wb * @inode: inode of interest * @page: page being dirtied (may be NULL) * * If @inode doesn't have its wb, associate it with the wb matching the * memcg of @page or, if @page is NULL, %current. May be called w/ or w/o * @inode->i_lock. */ static inline void inode_attach_wb(struct inode *inode, struct page *page) { if (!inode->i_wb) __inode_attach_wb(inode, page); } /** * inode_detach_wb - disassociate an inode from its wb * @inode: inode of interest * * @inode is being freed. Detach from its wb. */ static inline void inode_detach_wb(struct inode *inode) { if (inode->i_wb) { WARN_ON_ONCE(!(inode->i_state & I_CLEAR)); wb_put(inode->i_wb); inode->i_wb = NULL; } } /** * wbc_attach_fdatawrite_inode - associate wbc and inode for fdatawrite * @wbc: writeback_control of interest * @inode: target inode * * This function is to be used by __filemap_fdatawrite_range(), which is an * alternative entry point into writeback code, and first ensures @inode is * associated with a bdi_writeback and attaches it to @wbc. */ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, struct inode *inode) { spin_lock(&inode->i_lock); inode_attach_wb(inode, NULL); wbc_attach_and_unlock_inode(wbc, inode); } /** * wbc_init_bio - writeback specific initializtion of bio * @wbc: writeback_control for the writeback in progress * @bio: bio to be initialized * * @bio is a part of the writeback in progress controlled by @wbc. Perform * writeback specific initialization. This is used to apply the cgroup * writeback context. Must be called after the bio has been associated with * a device. */ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) { /* * pageout() path doesn't attach @wbc to the inode being written * out. This is intentional as we don't want the function to block * behind a slow cgroup. Ultimately, we want pageout() to kick off * regular writeback instead of writing things out itself. */ if (wbc->wb) bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css); } #else /* CONFIG_CGROUP_WRITEBACK */ static inline void inode_attach_wb(struct inode *inode, struct page *page) { } static inline void inode_detach_wb(struct inode *inode) { } static inline void wbc_attach_and_unlock_inode(struct writeback_control *wbc, struct inode *inode) __releases(&inode->i_lock) { spin_unlock(&inode->i_lock); } static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, struct inode *inode) { } static inline void wbc_detach_inode(struct writeback_control *wbc) { } static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) { } static inline void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, size_t bytes) { } static inline void cgroup_writeback_umount(void) { } #endif /* CONFIG_CGROUP_WRITEBACK */ /* * mm/page-writeback.c */ #ifdef CONFIG_BLOCK void laptop_io_completion(struct backing_dev_info *info); void laptop_sync_completion(void); void laptop_mode_sync(struct work_struct *work); void laptop_mode_timer_fn(struct timer_list *t); #else static inline void laptop_sync_completion(void) { } #endif bool node_dirty_ok(struct pglist_data *pgdat); int wb_domain_init(struct wb_domain *dom, gfp_t gfp); #ifdef CONFIG_CGROUP_WRITEBACK void wb_domain_exit(struct wb_domain *dom); #endif extern struct wb_domain global_wb_domain; /* These are exported to sysctl. */ extern int dirty_background_ratio; extern unsigned long dirty_background_bytes; extern int vm_dirty_ratio; extern unsigned long vm_dirty_bytes; extern unsigned int dirty_writeback_interval; extern unsigned int dirty_expire_interval; extern unsigned int dirtytime_expire_interval; extern int vm_highmem_is_dirtyable; extern int block_dump; extern int laptop_mode; int dirty_background_ratio_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int dirty_background_bytes_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int dirty_ratio_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int dirty_bytes_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int dirtytime_interval_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int dirty_writeback_centisecs_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time); void balance_dirty_pages_ratelimited(struct address_space *mapping); bool wb_over_bg_thresh(struct bdi_writeback *wb); typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, void *data); int generic_writepages(struct address_space *mapping, struct writeback_control *wbc); void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end); int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data); int do_writepages(struct address_space *mapping, struct writeback_control *wbc); void writeback_set_ratelimit(void); void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end); void account_page_redirty(struct page *page); void sb_mark_inode_writeback(struct inode *inode); void sb_clear_inode_writeback(struct inode *inode); #endif /* WRITEBACK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_CGROUP_H #define _LINUX_CGROUP_H /* * cgroup interface * * Copyright (C) 2003 BULL SA * Copyright (C) 2004-2006 Silicon Graphics, Inc. * */ #include <linux/sched.h> #include <linux/cpumask.h> #include <linux/nodemask.h> #include <linux/rculist.h> #include <linux/cgroupstats.h> #include <linux/fs.h> #include <linux/seq_file.h> #include <linux/kernfs.h> #include <linux/jump_label.h> #include <linux/types.h> #include <linux/ns_common.h> #include <linux/nsproxy.h> #include <linux/user_namespace.h> #include <linux/refcount.h> #include <linux/kernel_stat.h> #include <linux/cgroup-defs.h> struct kernel_clone_args; #ifdef CONFIG_CGROUPS /* * All weight knobs on the default hierarhcy should use the following min, * default and max values. The default value is the logarithmic center of * MIN and MAX and allows 100x to be expressed in both directions. */ #define CGROUP_WEIGHT_MIN 1 #define CGROUP_WEIGHT_DFL 100 #define CGROUP_WEIGHT_MAX 10000 /* walk only threadgroup leaders */ #define CSS_TASK_ITER_PROCS (1U << 0) /* walk all threaded css_sets in the domain */ #define CSS_TASK_ITER_THREADED (1U << 1) /* internal flags */ #define CSS_TASK_ITER_SKIPPED (1U << 16) /* a css_task_iter should be treated as an opaque object */ struct css_task_iter { struct cgroup_subsys *ss; unsigned int flags; struct list_head *cset_pos; struct list_head *cset_head; struct list_head *tcset_pos; struct list_head *tcset_head; struct list_head *task_pos; struct list_head *cur_tasks_head; struct css_set *cur_cset; struct css_set *cur_dcset; struct task_struct *cur_task; struct list_head iters_node; /* css_set->task_iters */ }; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; #include <linux/cgroup_subsys.h> #undef SUBSYS #define SUBSYS(_x) \ extern struct static_key_true _x ## _cgrp_subsys_enabled_key; \ extern struct static_key_true _x ## _cgrp_subsys_on_dfl_key; #include <linux/cgroup_subsys.h> #undef SUBSYS /** * cgroup_subsys_enabled - fast test on whether a subsys is enabled * @ss: subsystem in question */ #define cgroup_subsys_enabled(ss) \ static_branch_likely(&ss ## _enabled_key) /** * cgroup_subsys_on_dfl - fast test on whether a subsys is on default hierarchy * @ss: subsystem in question */ #define cgroup_subsys_on_dfl(ss) \ static_branch_likely(&ss ## _on_dfl_key) bool css_has_online_children(struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup, struct cgroup_subsys *ss); struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, struct cgroup_subsys *ss); struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, struct cgroup_subsys *ss); struct cgroup *cgroup_get_from_path(const char *path); struct cgroup *cgroup_get_from_fd(int fd); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cftype *cfts); void cgroup_file_notify(struct cgroup_file *cfile); int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk); void cgroup_fork(struct task_struct *p); extern int cgroup_can_fork(struct task_struct *p, struct kernel_clone_args *kargs); extern void cgroup_cancel_fork(struct task_struct *p, struct kernel_clone_args *kargs); extern void cgroup_post_fork(struct task_struct *p, struct kernel_clone_args *kargs); void cgroup_exit(struct task_struct *p); void cgroup_release(struct task_struct *p); void cgroup_free(struct task_struct *p); int cgroup_init_early(void); int cgroup_init(void); int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v); /* * Iteration helpers and macros. */ struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, struct cgroup_subsys_state *parent); struct cgroup_subsys_state *css_next_descendant_pre(struct cgroup_subsys_state *pos, struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state *pos); struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos, struct cgroup_subsys_state *css); struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset, struct cgroup_subsys_state **dst_cssp); struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, struct cgroup_subsys_state **dst_cssp); void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags, struct css_task_iter *it); struct task_struct *css_task_iter_next(struct css_task_iter *it); void css_task_iter_end(struct css_task_iter *it); /** * css_for_each_child - iterate through children of a css * @pos: the css * to use as the loop cursor * @parent: css whose children to walk * * Walk @parent's children. Must be called under rcu_read_lock(). * * If a subsystem synchronizes ->css_online() and the start of iteration, a * css which finished ->css_online() is guaranteed to be visible in the * future iterations and will stay visible until the last reference is put. * A css which hasn't finished ->css_online() or already finished * ->css_offline() may show up during traversal. It's each subsystem's * responsibility to synchronize against on/offlining. * * It is allowed to temporarily drop RCU read lock during iteration. The * caller is responsible for ensuring that @pos remains accessible until * the start of the next iteration by, for example, bumping the css refcnt. */ #define css_for_each_child(pos, parent) \ for ((pos) = css_next_child(NULL, (parent)); (pos); \ (pos) = css_next_child((pos), (parent))) /** * css_for_each_descendant_pre - pre-order walk of a css's descendants * @pos: the css * to use as the loop cursor * @root: css whose descendants to walk * * Walk @root's descendants. @root is included in the iteration and the * first node to be visited. Must be called under rcu_read_lock(). * * If a subsystem synchronizes ->css_online() and the start of iteration, a * css which finished ->css_online() is guaranteed to be visible in the * future iterations and will stay visible until the last reference is put. * A css which hasn't finished ->css_online() or already finished * ->css_offline() may show up during traversal. It's each subsystem's * responsibility to synchronize against on/offlining. * * For example, the following guarantees that a descendant can't escape * state updates of its ancestors. * * my_online(@css) * { * Lock @css's parent and @css; * Inherit state from the parent; * Unlock both. * } * * my_update_state(@css) * { * css_for_each_descendant_pre(@pos, @css) { * Lock @pos; * if (@pos == @css) * Update @css's state; * else * Verify @pos is alive and inherit state from its parent; * Unlock @pos; * } * } * * As long as the inheriting step, including checking the parent state, is * enclosed inside @pos locking, double-locking the parent isn't necessary * while inheriting. The state update to the parent is guaranteed to be * visible by walking order and, as long as inheriting operations to the * same @pos are atomic to each other, multiple updates racing each other * still result in the correct state. It's guaranateed that at least one * inheritance happens for any css after the latest update to its parent. * * If checking parent's state requires locking the parent, each inheriting * iteration should lock and unlock both @pos->parent and @pos. * * Alternatively, a subsystem may choose to use a single global lock to * synchronize ->css_online() and ->css_offline() against tree-walking * operations. * * It is allowed to temporarily drop RCU read lock during iteration. The * caller is responsible for ensuring that @pos remains accessible until * the start of the next iteration by, for example, bumping the css refcnt. */ #define css_for_each_descendant_pre(pos, css) \ for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \ (pos) = css_next_descendant_pre((pos), (css))) /** * css_for_each_descendant_post - post-order walk of a css's descendants * @pos: the css * to use as the loop cursor * @css: css whose descendants to walk * * Similar to css_for_each_descendant_pre() but performs post-order * traversal instead. @root is included in the iteration and the last * node to be visited. * * If a subsystem synchronizes ->css_online() and the start of iteration, a * css which finished ->css_online() is guaranteed to be visible in the * future iterations and will stay visible until the last reference is put. * A css which hasn't finished ->css_online() or already finished * ->css_offline() may show up during traversal. It's each subsystem's * responsibility to synchronize against on/offlining. * * Note that the walk visibility guarantee example described in pre-order * walk doesn't apply the same to post-order walks. */ #define css_for_each_descendant_post(pos, css) \ for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ (pos) = css_next_descendant_post((pos), (css))) /** * cgroup_taskset_for_each - iterate cgroup_taskset * @task: the loop cursor * @dst_css: the destination css * @tset: taskset to iterate * * @tset may contain multiple tasks and they may belong to multiple * processes. * * On the v2 hierarchy, there may be tasks from multiple processes and they * may not share the source or destination csses. * * On traditional hierarchies, when there are multiple tasks in @tset, if a * task of a process is in @tset, all tasks of the process are in @tset. * Also, all are guaranteed to share the same source and destination csses. * * Iteration is not in any specific order. */ #define cgroup_taskset_for_each(task, dst_css, tset) \ for ((task) = cgroup_taskset_first((tset), &(dst_css)); \ (task); \ (task) = cgroup_taskset_next((tset), &(dst_css))) /** * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset * @leader: the loop cursor * @dst_css: the destination css * @tset: taskset to iterate * * Iterate threadgroup leaders of @tset. For single-task migrations, @tset * may not contain any. */ #define cgroup_taskset_for_each_leader(leader, dst_css, tset) \ for ((leader) = cgroup_taskset_first((tset), &(dst_css)); \ (leader); \ (leader) = cgroup_taskset_next((tset), &(dst_css))) \ if ((leader) != (leader)->group_leader) \ ; \ else /* * Inline functions. */ static inline u64 cgroup_id(struct cgroup *cgrp) { return cgrp->kn->id; } /** * css_get - obtain a reference on the specified css * @css: target css * * The caller must already have a reference. */ static inline void css_get(struct cgroup_subsys_state *css) { if (!(css->flags & CSS_NO_REF)) percpu_ref_get(&css->refcnt); } /** * css_get_many - obtain references on the specified css * @css: target css * @n: number of references to get * * The caller must already have a reference. */ static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n) { if (!(css->flags & CSS_NO_REF)) percpu_ref_get_many(&css->refcnt, n); } /** * css_tryget - try to obtain a reference on the specified css * @css: target css * * Obtain a reference on @css unless it already has reached zero and is * being released. This function doesn't care whether @css is on or * offline. The caller naturally needs to ensure that @css is accessible * but doesn't have to be holding a reference on it - IOW, RCU protected * access is good enough for this function. Returns %true if a reference * count was successfully obtained; %false otherwise. */ static inline bool css_tryget(struct cgroup_subsys_state *css) { if (!(css->flags & CSS_NO_REF)) return percpu_ref_tryget(&css->refcnt); return true; } /** * css_tryget_online - try to obtain a reference on the specified css if online * @css: target css * * Obtain a reference on @css if it's online. The caller naturally needs * to ensure that @css is accessible but doesn't have to be holding a * reference on it - IOW, RCU protected access is good enough for this * function. Returns %true if a reference count was successfully obtained; * %false otherwise. */ static inline bool css_tryget_online(struct cgroup_subsys_state *css) { if (!(css->flags & CSS_NO_REF)) return percpu_ref_tryget_live(&css->refcnt); return true; } /** * css_is_dying - test whether the specified css is dying * @css: target css * * Test whether @css is in the process of offlining or already offline. In * most cases, ->css_online() and ->css_offline() callbacks should be * enough; however, the actual offline operations are RCU delayed and this * test returns %true also when @css is scheduled to be offlined. * * This is useful, for example, when the use case requires synchronous * behavior with respect to cgroup removal. cgroup removal schedules css * offlining but the css can seem alive while the operation is being * delayed. If the delay affects user visible semantics, this test can be * used to resolve the situation. */ static inline bool css_is_dying(struct cgroup_subsys_state *css) { return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt); } /** * css_put - put a css reference * @css: target css * * Put a reference obtained via css_get() and css_tryget_online(). */ static inline void css_put(struct cgroup_subsys_state *css) { if (!(css->flags & CSS_NO_REF)) percpu_ref_put(&css->refcnt); } /** * css_put_many - put css references * @css: target css * @n: number of references to put * * Put references obtained via css_get() and css_tryget_online(). */ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) { if (!(css->flags & CSS_NO_REF)) percpu_ref_put_many(&css->refcnt, n); } static inline void cgroup_get(struct cgroup *cgrp) { css_get(&cgrp->self); } static inline bool cgroup_tryget(struct cgroup *cgrp) { return css_tryget(&cgrp->self); } static inline void cgroup_put(struct cgroup *cgrp) { css_put(&cgrp->self); } /** * task_css_set_check - obtain a task's css_set with extra access conditions * @task: the task to obtain css_set for * @__c: extra condition expression to be passed to rcu_dereference_check() * * A task's css_set is RCU protected, initialized and exited while holding * task_lock(), and can only be modified while holding both cgroup_mutex * and task_lock() while the task is alive. This macro verifies that the * caller is inside proper critical section and returns @task's css_set. * * The caller can also specify additional allowed conditions via @__c, such * as locks used during the cgroup_subsys::attach() methods. */ #ifdef CONFIG_PROVE_RCU extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ lockdep_is_held(&cgroup_mutex) || \ lockdep_is_held(&css_set_lock) || \ ((task)->flags & PF_EXITING) || (__c)) #else #define task_css_set_check(task, __c) \ rcu_dereference((task)->cgroups) #endif /** * task_css_check - obtain css for (task, subsys) w/ extra access conds * @task: the target task * @subsys_id: the target subsystem ID * @__c: extra condition expression to be passed to rcu_dereference_check() * * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The * synchronization rules are the same as task_css_set_check(). */ #define task_css_check(task, subsys_id, __c) \ task_css_set_check((task), (__c))->subsys[(subsys_id)] /** * task_css_set - obtain a task's css_set * @task: the task to obtain css_set for * * See task_css_set_check(). */ static inline struct css_set *task_css_set(struct task_struct *task) { return task_css_set_check(task, false); } /** * task_css - obtain css for (task, subsys) * @task: the target task * @subsys_id: the target subsystem ID * * See task_css_check(). */ static inline struct cgroup_subsys_state *task_css(struct task_struct *task, int subsys_id) { return task_css_check(task, subsys_id, false); } /** * task_get_css - find and get the css for (task, subsys) * @task: the target task * @subsys_id: the target subsystem ID * * Find the css for the (@task, @subsys_id) combination, increment a * reference on and return it. This function is guaranteed to return a * valid css. The returned css may already have been offlined. */ static inline struct cgroup_subsys_state * task_get_css(struct task_struct *task, int subsys_id) { struct cgroup_subsys_state *css; rcu_read_lock(); while (true) { css = task_css(task, subsys_id); /* * Can't use css_tryget_online() here. A task which has * PF_EXITING set may stay associated with an offline css. * If such task calls this function, css_tryget_online() * will keep failing. */ if (likely(css_tryget(css))) break; cpu_relax(); } rcu_read_unlock(); return css; } /** * task_css_is_root - test whether a task belongs to the root css * @task: the target task * @subsys_id: the target subsystem ID * * Test whether @task belongs to the root css on the specified subsystem. * May be invoked in any context. */ static inline bool task_css_is_root(struct task_struct *task, int subsys_id) { return task_css_check(task, subsys_id, true) == init_css_set.subsys[subsys_id]; } static inline struct cgroup *task_cgroup(struct task_struct *task, int subsys_id) { return task_css(task, subsys_id)->cgroup; } static inline struct cgroup *task_dfl_cgroup(struct task_struct *task) { return task_css_set(task)->dfl_cgrp; } static inline struct cgroup *cgroup_parent(struct cgroup *cgrp) { struct cgroup_subsys_state *parent_css = cgrp->self.parent; if (parent_css) return container_of(parent_css, struct cgroup, self); return NULL; } /** * cgroup_is_descendant - test ancestry * @cgrp: the cgroup to be tested * @ancestor: possible ancestor of @cgrp * * Test whether @cgrp is a descendant of @ancestor. It also returns %true * if @cgrp == @ancestor. This function is safe to call as long as @cgrp * and @ancestor are accessible. */ static inline bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor) { if (cgrp->root != ancestor->root || cgrp->level < ancestor->level) return false; return cgrp->ancestor_ids[ancestor->level] == cgroup_id(ancestor); } /** * cgroup_ancestor - find ancestor of cgroup * @cgrp: cgroup to find ancestor of * @ancestor_level: level of ancestor to find starting from root * * Find ancestor of cgroup at specified level starting from root if it exists * and return pointer to it. Return NULL if @cgrp doesn't have ancestor at * @ancestor_level. * * This function is safe to call as long as @cgrp is accessible. */ static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp, int ancestor_level) { if (cgrp->level < ancestor_level) return NULL; while (cgrp && cgrp->level > ancestor_level) cgrp = cgroup_parent(cgrp); return cgrp; } /** * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry * @task: the task to be tested * @ancestor: possible ancestor of @task's cgroup * * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor. * It follows all the same rules as cgroup_is_descendant, and only applies * to the default hierarchy. */ static inline bool task_under_cgroup_hierarchy(struct task_struct *task, struct cgroup *ancestor) { struct css_set *cset = task_css_set(task); return cgroup_is_descendant(cset->dfl_cgrp, ancestor); } /* no synchronization, the result can only be used as a hint */ static inline bool cgroup_is_populated(struct cgroup *cgrp) { return cgrp->nr_populated_csets + cgrp->nr_populated_domain_children + cgrp->nr_populated_threaded_children; } /* returns ino associated with a cgroup */ static inline ino_t cgroup_ino(struct cgroup *cgrp) { return kernfs_ino(cgrp->kn); } /* cft/css accessors for cftype->write() operation */ static inline struct cftype *of_cft(struct kernfs_open_file *of) { return of->kn->priv; } struct cgroup_subsys_state *of_css(struct kernfs_open_file *of); /* cft/css accessors for cftype->seq_*() operations */ static inline struct cftype *seq_cft(struct seq_file *seq) { return of_cft(seq->private); } static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq) { return of_css(seq->private); } /* * Name / path handling functions. All are thin wrappers around the kernfs * counterparts and can be called under any context. */ static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen) { return kernfs_name(cgrp->kn, buf, buflen); } static inline int cgroup_path(struct cgroup *cgrp, char *buf, size_t buflen) { return kernfs_path(cgrp->kn, buf, buflen); } static inline void pr_cont_cgroup_name(struct cgroup *cgrp) { pr_cont_kernfs_name(cgrp->kn); } static inline void pr_cont_cgroup_path(struct cgroup *cgrp) { pr_cont_kernfs_path(cgrp->kn); } static inline struct psi_group *cgroup_psi(struct cgroup *cgrp) { return &cgrp->psi; } static inline void cgroup_init_kthreadd(void) { /* * kthreadd is inherited by all kthreads, keep it in the root so * that the new kthreads are guaranteed to stay in the root until * initialization is finished. */ current->no_cgroup_migration = 1; } static inline void cgroup_kthread_ready(void) { /* * This kthread finished initialization. The creator should have * set PF_NO_SETAFFINITY if this kthread should stay in the root. */ current->no_cgroup_migration = 0; } void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen); #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; struct cgroup; static inline u64 cgroup_id(struct cgroup *cgrp) { return 1; } static inline void css_get(struct cgroup_subsys_state *css) {} static inline void css_put(struct cgroup_subsys_state *css) {} static inline int cgroup_attach_task_all(struct task_struct *from, struct task_struct *t) { return 0; } static inline int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) { return -EINVAL; } static inline void cgroup_fork(struct task_struct *p) {} static inline int cgroup_can_fork(struct task_struct *p, struct kernel_clone_args *kargs) { return 0; } static inline void cgroup_cancel_fork(struct task_struct *p, struct kernel_clone_args *kargs) {} static inline void cgroup_post_fork(struct task_struct *p, struct kernel_clone_args *kargs) {} static inline void cgroup_exit(struct task_struct *p) {} static inline void cgroup_release(struct task_struct *p) {} static inline void cgroup_free(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } static inline void cgroup_init_kthreadd(void) {} static inline void cgroup_kthread_ready(void) {} static inline struct cgroup *cgroup_parent(struct cgroup *cgrp) { return NULL; } static inline struct psi_group *cgroup_psi(struct cgroup *cgrp) { return NULL; } static inline bool task_under_cgroup_hierarchy(struct task_struct *task, struct cgroup *ancestor) { return true; } static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) {} #endif /* !CONFIG_CGROUPS */ #ifdef CONFIG_CGROUPS /* * cgroup scalable recursive statistics. */ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu); void cgroup_rstat_flush(struct cgroup *cgrp); void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp); void cgroup_rstat_flush_hold(struct cgroup *cgrp); void cgroup_rstat_flush_release(void); /* * Basic resource stats. */ #ifdef CONFIG_CGROUP_CPUACCT void cpuacct_charge(struct task_struct *tsk, u64 cputime); void cpuacct_account_field(struct task_struct *tsk, int index, u64 val); #else static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} static inline void cpuacct_account_field(struct task_struct *tsk, int index, u64 val) {} #endif void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec); void __cgroup_account_cputime_field(struct cgroup *cgrp, enum cpu_usage_stat index, u64 delta_exec); static inline void cgroup_account_cputime(struct task_struct *task, u64 delta_exec) { struct cgroup *cgrp; cpuacct_charge(task, delta_exec); rcu_read_lock(); cgrp = task_dfl_cgroup(task); if (cgroup_parent(cgrp)) __cgroup_account_cputime(cgrp, delta_exec); rcu_read_unlock(); } static inline void cgroup_account_cputime_field(struct task_struct *task, enum cpu_usage_stat index, u64 delta_exec) { struct cgroup *cgrp; cpuacct_account_field(task, index, delta_exec); rcu_read_lock(); cgrp = task_dfl_cgroup(task); if (cgroup_parent(cgrp)) __cgroup_account_cputime_field(cgrp, index, delta_exec); rcu_read_unlock(); } #else /* CONFIG_CGROUPS */ static inline void cgroup_account_cputime(struct task_struct *task, u64 delta_exec) {} static inline void cgroup_account_cputime_field(struct task_struct *task, enum cpu_usage_stat index, u64 delta_exec) {} #endif /* CONFIG_CGROUPS */ /* * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data * definition in cgroup-defs.h. */ #ifdef CONFIG_SOCK_CGROUP_DATA #if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) extern spinlock_t cgroup_sk_update_lock; #endif void cgroup_sk_alloc_disable(void); void cgroup_sk_alloc(struct sock_cgroup_data *skcd); void cgroup_sk_clone(struct sock_cgroup_data *skcd); void cgroup_sk_free(struct sock_cgroup_data *skcd); static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) { #if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) unsigned long v; /* * @skcd->val is 64bit but the following is safe on 32bit too as we * just need the lower ulong to be written and read atomically. */ v = READ_ONCE(skcd->val); if (v & 3) return &cgrp_dfl_root.cgrp; return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp; #else return (struct cgroup *)(unsigned long)skcd->val; #endif } #else /* CONFIG_CGROUP_DATA */ static inline void cgroup_sk_alloc(struct sock_cgroup_data *skcd) {} static inline void cgroup_sk_clone(struct sock_cgroup_data *skcd) {} static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {} #endif /* CONFIG_CGROUP_DATA */ struct cgroup_namespace { refcount_t count; struct ns_common ns; struct user_namespace *user_ns; struct ucounts *ucounts; struct css_set *root_cset; }; extern struct cgroup_namespace init_cgroup_ns; #ifdef CONFIG_CGROUPS void free_cgroup_ns(struct cgroup_namespace *ns); struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, struct cgroup_namespace *old_ns); int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, struct cgroup_namespace *ns); #else /* !CONFIG_CGROUPS */ static inline void free_cgroup_ns(struct cgroup_namespace *ns) { } static inline struct cgroup_namespace * copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, struct cgroup_namespace *old_ns) { return old_ns; } #endif /* !CONFIG_CGROUPS */ static inline void get_cgroup_ns(struct cgroup_namespace *ns) { if (ns) refcount_inc(&ns->count); } static inline void put_cgroup_ns(struct cgroup_namespace *ns) { if (ns && refcount_dec_and_test(&ns->count)) free_cgroup_ns(ns); } #ifdef CONFIG_CGROUPS void cgroup_enter_frozen(void); void cgroup_leave_frozen(bool always_leave); void cgroup_update_frozen(struct cgroup *cgrp); void cgroup_freeze(struct cgroup *cgrp, bool freeze); void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src, struct cgroup *dst); static inline bool cgroup_task_freeze(struct task_struct *task) { bool ret; if (task->flags & PF_KTHREAD) return false; rcu_read_lock(); ret = test_bit(CGRP_FREEZE, &task_dfl_cgroup(task)->flags); rcu_read_unlock(); return ret; } static inline bool cgroup_task_frozen(struct task_struct *task) { return task->frozen; } #else /* !CONFIG_CGROUPS */ static inline void cgroup_enter_frozen(void) { } static inline void cgroup_leave_frozen(bool always_leave) { } static inline bool cgroup_task_freeze(struct task_struct *task) { return false; } static inline bool cgroup_task_frozen(struct task_struct *task) { return false; } #endif /* !CONFIG_CGROUPS */ #ifdef CONFIG_CGROUP_BPF static inline void cgroup_bpf_get(struct cgroup *cgrp) { percpu_ref_get(&cgrp->bpf.refcnt); } static inline void cgroup_bpf_put(struct cgroup *cgrp) { percpu_ref_put(&cgrp->bpf.refcnt); } #else /* CONFIG_CGROUP_BPF */ static inline void cgroup_bpf_get(struct cgroup *cgrp) {} static inline void cgroup_bpf_put(struct cgroup *cgrp) {} #endif /* CONFIG_CGROUP_BPF */ #endif /* _LINUX_CGROUP_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 /* SPDX-License-Identifier: GPL-2.0 */ /* * include/linux/pagevec.h * * In many places it is efficient to batch an operation up against multiple * pages. A pagevec is a multipage container which is used for that. */ #ifndef _LINUX_PAGEVEC_H #define _LINUX_PAGEVEC_H #include <linux/xarray.h> /* 15 pointers + header align the pagevec structure to a power of two */ #define PAGEVEC_SIZE 15 struct page; struct address_space; struct pagevec { unsigned char nr; bool percpu_pvec_drained; struct page *pages[PAGEVEC_SIZE]; }; void __pagevec_release(struct pagevec *pvec); void __pagevec_lru_add(struct pagevec *pvec); unsigned pagevec_lookup_entries(struct pagevec *pvec, struct address_space *mapping, pgoff_t start, unsigned nr_entries, pgoff_t *indices); void pagevec_remove_exceptionals(struct pagevec *pvec); unsigned pagevec_lookup_range(struct pagevec *pvec, struct address_space *mapping, pgoff_t *start, pgoff_t end); static inline unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, pgoff_t *start) { return pagevec_lookup_range(pvec, mapping, start, (pgoff_t)-1); } unsigned pagevec_lookup_range_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, pgoff_t end, xa_mark_t tag); unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, pgoff_t end, xa_mark_t tag, unsigned max_pages); static inline unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, xa_mark_t tag) { return pagevec_lookup_range_tag(pvec, mapping, index, (pgoff_t)-1, tag); } static inline void pagevec_init(struct pagevec *pvec) { pvec->nr = 0; pvec->percpu_pvec_drained = false; } static inline void pagevec_reinit(struct pagevec *pvec) { pvec->nr = 0; } static inline unsigned pagevec_count(struct pagevec *pvec) { return pvec->nr; } static inline unsigned pagevec_space(struct pagevec *pvec) { return PAGEVEC_SIZE - pvec->nr; } /* * Add a page to a pagevec. Returns the number of slots still available. */ static inline unsigned pagevec_add(struct pagevec *pvec, struct page *page) { pvec->pages[pvec->nr++] = page; return pagevec_space(pvec); } static inline void pagevec_release(struct pagevec *pvec) { if (pagevec_count(pvec)) __pagevec_release(pvec); } #endif /* _LINUX_PAGEVEC_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 /* SPDX-License-Identifier: GPL-2.0-only */ /* * kernfs.h - pseudo filesystem decoupled from vfs locking */ #ifndef __LINUX_KERNFS_H #define __LINUX_KERNFS_H #include <linux/kernel.h> #include <linux/err.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/idr.h> #include <linux/lockdep.h> #include <linux/rbtree.h> #include <linux/atomic.h> #include <linux/uidgid.h> #include <linux/wait.h> struct file; struct dentry; struct iattr; struct seq_file; struct vm_area_struct; struct super_block; struct file_system_type; struct poll_table_struct; struct fs_context; struct kernfs_fs_context; struct kernfs_open_node; struct kernfs_iattrs; enum kernfs_node_type { KERNFS_DIR = 0x0001, KERNFS_FILE = 0x0002, KERNFS_LINK = 0x0004, }; #define KERNFS_TYPE_MASK 0x000f #define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK #define KERNFS_MAX_USER_XATTRS 128 #define KERNFS_USER_XATTR_SIZE_LIMIT (128 << 10) enum kernfs_node_flag { KERNFS_ACTIVATED = 0x0010, KERNFS_NS = 0x0020, KERNFS_HAS_SEQ_SHOW = 0x0040, KERNFS_HAS_MMAP = 0x0080, KERNFS_LOCKDEP = 0x0100, KERNFS_SUICIDAL = 0x0400, KERNFS_SUICIDED = 0x0800, KERNFS_EMPTY_DIR = 0x1000, KERNFS_HAS_RELEASE = 0x2000, }; /* @flags for kernfs_create_root() */ enum kernfs_root_flag { /* * kernfs_nodes are created in the deactivated state and invisible. * They require explicit kernfs_activate() to become visible. This * can be used to make related nodes become visible atomically * after all nodes are created successfully. */ KERNFS_ROOT_CREATE_DEACTIVATED = 0x0001, /* * For regular files, if the opener has CAP_DAC_OVERRIDE, open(2) * succeeds regardless of the RW permissions. sysfs had an extra * layer of enforcement where open(2) fails with -EACCES regardless * of CAP_DAC_OVERRIDE if the permission doesn't have the * respective read or write access at all (none of S_IRUGO or * S_IWUGO) or the respective operation isn't implemented. The * following flag enables that behavior. */ KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK = 0x0002, /* * The filesystem supports exportfs operation, so userspace can use * fhandle to access nodes of the fs. */ KERNFS_ROOT_SUPPORT_EXPORTOP = 0x0004, /* * Support user xattrs to be written to nodes rooted at this root. */ KERNFS_ROOT_SUPPORT_USER_XATTR = 0x0008, }; /* type-specific structures for kernfs_node union members */ struct kernfs_elem_dir { unsigned long subdirs; /* children rbtree starts here and goes through kn->rb */ struct rb_root children; /* * The kernfs hierarchy this directory belongs to. This fits * better directly in kernfs_node but is here to save space. */ struct kernfs_root *root; }; struct kernfs_elem_symlink { struct kernfs_node *target_kn; }; struct kernfs_elem_attr { const struct kernfs_ops *ops; struct kernfs_open_node *open; loff_t size; struct kernfs_node *notify_next; /* for kernfs_notify() */ }; /* * kernfs_node - the building block of kernfs hierarchy. Each and every * kernfs node is represented by single kernfs_node. Most fields are * private to kernfs and shouldn't be accessed directly by kernfs users. * * As long as s_count reference is held, the kernfs_node itself is * accessible. Dereferencing elem or any other outer entity requires * active reference. */ struct kernfs_node { atomic_t count; atomic_t active; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif /* * Use kernfs_get_parent() and kernfs_name/path() instead of * accessing the following two fields directly. If the node is * never moved to a different parent, it is safe to access the * parent directly. */ struct kernfs_node *parent; const char *name; struct rb_node rb; const void *ns; /* namespace tag */ unsigned int hash; /* ns + name hash */ union { struct kernfs_elem_dir dir; struct kernfs_elem_symlink symlink; struct kernfs_elem_attr attr; }; void *priv; /* * 64bit unique ID. On 64bit ino setups, id is the ino. On 32bit, * the low 32bits are ino and upper generation. */ u64 id; unsigned short flags; umode_t mode; struct kernfs_iattrs *iattr; }; /* * kernfs_syscall_ops may be specified on kernfs_create_root() to support * syscalls. These optional callbacks are invoked on the matching syscalls * and can perform any kernfs operations which don't necessarily have to be * the exact operation requested. An active reference is held for each * kernfs_node parameter. */ struct kernfs_syscall_ops { int (*show_options)(struct seq_file *sf, struct kernfs_root *root); int (*mkdir)(struct kernfs_node *parent, const char *name, umode_t mode); int (*rmdir)(struct kernfs_node *kn); int (*rename)(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name); int (*show_path)(struct seq_file *sf, struct kernfs_node *kn, struct kernfs_root *root); }; struct kernfs_root { /* published fields */ struct kernfs_node *kn; unsigned int flags; /* KERNFS_ROOT_* flags */ /* private fields, do not use outside kernfs proper */ struct idr ino_idr; u32 last_id_lowbits; u32 id_highbits; struct kernfs_syscall_ops *syscall_ops; /* list of kernfs_super_info of this root, protected by kernfs_mutex */ struct list_head supers; wait_queue_head_t deactivate_waitq; }; struct kernfs_open_file { /* published fields */ struct kernfs_node *kn; struct file *file; struct seq_file *seq_file; void *priv; /* private fields, do not use outside kernfs proper */ struct mutex mutex; struct mutex prealloc_mutex; int event; struct list_head list; char *prealloc_buf; size_t atomic_write_len; bool mmapped:1; bool released:1; const struct vm_operations_struct *vm_ops; }; struct kernfs_ops { /* * Optional open/release methods. Both are called with * @of->seq_file populated. */ int (*open)(struct kernfs_open_file *of); void (*release)(struct kernfs_open_file *of); /* * Read is handled by either seq_file or raw_read(). * * If seq_show() is present, seq_file path is active. Other seq * operations are optional and if not implemented, the behavior is * equivalent to single_open(). @sf->private points to the * associated kernfs_open_file. * * read() is bounced through kernel buffer and a read larger than * PAGE_SIZE results in partial operation of PAGE_SIZE. */ int (*seq_show)(struct seq_file *sf, void *v); void *(*seq_start)(struct seq_file *sf, loff_t *ppos); void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); void (*seq_stop)(struct seq_file *sf, void *v); ssize_t (*read)(struct kernfs_open_file *of, char *buf, size_t bytes, loff_t off); /* * write() is bounced through kernel buffer. If atomic_write_len * is not set, a write larger than PAGE_SIZE results in partial * operations of PAGE_SIZE chunks. If atomic_write_len is set, * writes upto the specified size are executed atomically but * larger ones are rejected with -E2BIG. */ size_t atomic_write_len; /* * "prealloc" causes a buffer to be allocated at open for * all read/write requests. As ->seq_show uses seq_read() * which does its own allocation, it is incompatible with * ->prealloc. Provide ->read and ->write with ->prealloc. */ bool prealloc; ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t bytes, loff_t off); __poll_t (*poll)(struct kernfs_open_file *of, struct poll_table_struct *pt); int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma); #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lock_class_key lockdep_key; #endif }; /* * The kernfs superblock creation/mount parameter context. */ struct kernfs_fs_context { struct kernfs_root *root; /* Root of the hierarchy being mounted */ void *ns_tag; /* Namespace tag of the mount (or NULL) */ unsigned long magic; /* File system specific magic number */ /* The following are set/used by kernfs_mount() */ bool new_sb_created; /* Set to T if we allocated a new sb */ }; #ifdef CONFIG_KERNFS static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) { return kn->flags & KERNFS_TYPE_MASK; } static inline ino_t kernfs_id_ino(u64 id) { /* id is ino if ino_t is 64bit; otherwise, low 32bits */ if (sizeof(ino_t) >= sizeof(u64)) return id; else return (u32)id; } static inline u32 kernfs_id_gen(u64 id) { /* gen is fixed at 1 if ino_t is 64bit; otherwise, high 32bits */ if (sizeof(ino_t) >= sizeof(u64)) return 1; else return id >> 32; } static inline ino_t kernfs_ino(struct kernfs_node *kn) { return kernfs_id_ino(kn->id); } static inline ino_t kernfs_gen(struct kernfs_node *kn) { return kernfs_id_gen(kn->id); } /** * kernfs_enable_ns - enable namespace under a directory * @kn: directory of interest, should be empty * * This is to be called right after @kn is created to enable namespace * under it. All children of @kn must have non-NULL namespace tags and * only the ones which match the super_block's tag will be visible. */ static inline void kernfs_enable_ns(struct kernfs_node *kn) { WARN_ON_ONCE(kernfs_type(kn) != KERNFS_DIR); WARN_ON_ONCE(!RB_EMPTY_ROOT(&kn->dir.children)); kn->flags |= KERNFS_NS; } /** * kernfs_ns_enabled - test whether namespace is enabled * @kn: the node to test * * Test whether namespace filtering is enabled for the children of @ns. */ static inline bool kernfs_ns_enabled(struct kernfs_node *kn) { return kn->flags & KERNFS_NS; } int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn, char *buf, size_t buflen); void pr_cont_kernfs_name(struct kernfs_node *kn); void pr_cont_kernfs_path(struct kernfs_node *kn); struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn); struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns); struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path, const void *ns); void kernfs_get(struct kernfs_node *kn); void kernfs_put(struct kernfs_node *kn); struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry); struct kernfs_root *kernfs_root_from_sb(struct super_block *sb); struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn); struct dentry *kernfs_node_dentry(struct kernfs_node *kn, struct super_block *sb); struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv); void kernfs_destroy_root(struct kernfs_root *root); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, void *priv, const void *ns); struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, const char *name); struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns, struct lock_class_key *key); struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, const char *name, struct kernfs_node *target); void kernfs_activate(struct kernfs_node *kn); void kernfs_remove(struct kernfs_node *kn); void kernfs_break_active_protection(struct kernfs_node *kn); void kernfs_unbreak_active_protection(struct kernfs_node *kn); bool kernfs_remove_self(struct kernfs_node *kn); int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, const void *ns); int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name, const void *new_ns); int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr); __poll_t kernfs_generic_poll(struct kernfs_open_file *of, struct poll_table_struct *pt); void kernfs_notify(struct kernfs_node *kn); int kernfs_xattr_get(struct kernfs_node *kn, const char *name, void *value, size_t size); int kernfs_xattr_set(struct kernfs_node *kn, const char *name, const void *value, size_t size, int flags); const void *kernfs_super_ns(struct super_block *sb); int kernfs_get_tree(struct fs_context *fc); void kernfs_free_fs_context(struct fs_context *fc); void kernfs_kill_sb(struct super_block *sb); void kernfs_init(void); struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, u64 id); #else /* CONFIG_KERNFS */ static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) { return 0; } /* whatever */ static inline void kernfs_enable_ns(struct kernfs_node *kn) { } static inline bool kernfs_ns_enabled(struct kernfs_node *kn) { return false; } static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) { return -ENOSYS; } static inline int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn, char *buf, size_t buflen) { return -ENOSYS; } static inline void pr_cont_kernfs_name(struct kernfs_node *kn) { } static inline void pr_cont_kernfs_path(struct kernfs_node *kn) { } static inline struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) { return NULL; } static inline struct kernfs_node * kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns) { return NULL; } static inline struct kernfs_node * kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path, const void *ns) { return NULL; } static inline void kernfs_get(struct kernfs_node *kn) { } static inline void kernfs_put(struct kernfs_node *kn) { } static inline struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) { return NULL; } static inline struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) { return NULL; } static inline struct inode * kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn) { return NULL; } static inline struct kernfs_root * kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv) { return ERR_PTR(-ENOSYS); } static inline void kernfs_destroy_root(struct kernfs_root *root) { } static inline struct kernfs_node * kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, void *priv, const void *ns) { return ERR_PTR(-ENOSYS); } static inline struct kernfs_node * __kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns, struct lock_class_key *key) { return ERR_PTR(-ENOSYS); } static inline struct kernfs_node * kernfs_create_link(struct kernfs_node *parent, const char *name, struct kernfs_node *target) { return ERR_PTR(-ENOSYS); } static inline void kernfs_activate(struct kernfs_node *kn) { } static inline void kernfs_remove(struct kernfs_node *kn) { } static inline bool kernfs_remove_self(struct kernfs_node *kn) { return false; } static inline int kernfs_remove_by_name_ns(struct kernfs_node *kn, const char *name, const void *ns) { return -ENOSYS; } static inline int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name, const void *new_ns) { return -ENOSYS; } static inline int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr) { return -ENOSYS; } static inline void kernfs_notify(struct kernfs_node *kn) { } static inline int kernfs_xattr_get(struct kernfs_node *kn, const char *name, void *value, size_t size) { return -ENOSYS; } static inline int kernfs_xattr_set(struct kernfs_node *kn, const char *name, const void *value, size_t size, int flags) { return -ENOSYS; } static inline const void *kernfs_super_ns(struct super_block *sb) { return NULL; } static inline int kernfs_get_tree(struct fs_context *fc) { return -ENOSYS; } static inline void kernfs_free_fs_context(struct fs_context *fc) { } static inline void kernfs_kill_sb(struct super_block *sb) { } static inline void kernfs_init(void) { } #endif /* CONFIG_KERNFS */ /** * kernfs_path - build full path of a given node * @kn: kernfs_node of interest * @buf: buffer to copy @kn's name into * @buflen: size of @buf * * If @kn is NULL result will be "(null)". * * Returns the length of the full path. If the full length is equal to or * greater than @buflen, @buf contains the truncated path with the trailing * '\0'. On error, -errno is returned. */ static inline int kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) { return kernfs_path_from_node(kn, NULL, buf, buflen); } static inline struct kernfs_node * kernfs_find_and_get(struct kernfs_node *kn, const char *name) { return kernfs_find_and_get_ns(kn, name, NULL); } static inline struct kernfs_node * kernfs_walk_and_get(struct kernfs_node *kn, const char *path) { return kernfs_walk_and_get_ns(kn, path, NULL); } static inline struct kernfs_node * kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode, void *priv) { return kernfs_create_dir_ns(parent, name, mode, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, priv, NULL); } static inline struct kernfs_node * kernfs_create_file_ns(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns) { struct lock_class_key *key = NULL; #ifdef CONFIG_DEBUG_LOCK_ALLOC key = (struct lock_class_key *)&ops->lockdep_key; #endif return __kernfs_create_file(parent, name, mode, uid, gid, size, ops, priv, ns, key); } static inline struct kernfs_node * kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, loff_t size, const struct kernfs_ops *ops, void *priv) { return kernfs_create_file_ns(parent, name, mode, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, size, ops, priv, NULL); } static inline int kernfs_remove_by_name(struct kernfs_node *parent, const char *name) { return kernfs_remove_by_name_ns(parent, name, NULL); } static inline int kernfs_rename(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name) { return kernfs_rename_ns(kn, new_parent, new_name, NULL); } #endif /* __LINUX_KERNFS_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_BITOPS_H #define _ASM_X86_BITOPS_H /* * Copyright 1992, Linus Torvalds. * * Note: inlines with more than a single statement should be marked * __always_inline to avoid problems with older gcc's inlining heuristics. */ #ifndef _LINUX_BITOPS_H #error only <linux/bitops.h> can be included directly #endif #include <linux/compiler.h> #include <asm/alternative.h> #include <asm/rmwcc.h> #include <asm/barrier.h> #if BITS_PER_LONG == 32 # define _BITOPS_LONG_SHIFT 5 #elif BITS_PER_LONG == 64 # define _BITOPS_LONG_SHIFT 6 #else # error "Unexpected BITS_PER_LONG" #endif #define BIT_64(n) (U64_C(1) << (n)) /* * These have to be done with inline assembly: that way the bit-setting * is guaranteed to be atomic. All bit operations return 0 if the bit * was cleared before the operation and != 0 if it was not. * * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). */ #define RLONG_ADDR(x) "m" (*(volatile long *) (x)) #define WBYTE_ADDR(x) "+m" (*(volatile char *) (x)) #define ADDR RLONG_ADDR(addr) /* * We do the locked ops that don't return the old value as * a mask operation on a byte. */ #define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3)) #define CONST_MASK(nr) (1 << ((nr) & 7)) static __always_inline void arch_set_bit(long nr, volatile unsigned long *addr) { if (__builtin_constant_p(nr)) { asm volatile(LOCK_PREFIX "orb %b1,%0" : CONST_MASK_ADDR(nr, addr) : "iq" (CONST_MASK(nr)) : "memory"); } else { asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); } } static __always_inline void arch___set_bit(long nr, volatile unsigned long *addr) { asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } static __always_inline void arch_clear_bit(long nr, volatile unsigned long *addr) { if (__builtin_constant_p(nr)) { asm volatile(LOCK_PREFIX "andb %b1,%0" : CONST_MASK_ADDR(nr, addr) : "iq" (~CONST_MASK(nr))); } else { asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); } } static __always_inline void arch_clear_bit_unlock(long nr, volatile unsigned long *addr) { barrier(); arch_clear_bit(nr, addr); } static __always_inline void arch___clear_bit(long nr, volatile unsigned long *addr) { asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } static __always_inline bool arch_clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) { bool negative; asm volatile(LOCK_PREFIX "andb %2,%1" CC_SET(s) : CC_OUT(s) (negative), WBYTE_ADDR(addr) : "ir" ((char) ~(1 << nr)) : "memory"); return negative; } #define arch_clear_bit_unlock_is_negative_byte \ arch_clear_bit_unlock_is_negative_byte static __always_inline void arch___clear_bit_unlock(long nr, volatile unsigned long *addr) { arch___clear_bit(nr, addr); } static __always_inline void arch___change_bit(long nr, volatile unsigned long *addr) { asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } static __always_inline void arch_change_bit(long nr, volatile unsigned long *addr) { if (__builtin_constant_p(nr)) { asm volatile(LOCK_PREFIX "xorb %b1,%0" : CONST_MASK_ADDR(nr, addr) : "iq" (CONST_MASK(nr))); } else { asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0" : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); } } static __always_inline bool arch_test_and_set_bit(long nr, volatile unsigned long *addr) { return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr); } static __always_inline bool arch_test_and_set_bit_lock(long nr, volatile unsigned long *addr) { return arch_test_and_set_bit(nr, addr); } static __always_inline bool arch___test_and_set_bit(long nr, volatile unsigned long *addr) { bool oldbit; asm(__ASM_SIZE(bts) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit) : ADDR, "Ir" (nr) : "memory"); return oldbit; } static __always_inline bool arch_test_and_clear_bit(long nr, volatile unsigned long *addr) { return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr); } /* * Note: the operation is performed atomically with respect to * the local CPU, but not other CPUs. Portable code should not * rely on this behaviour. * KVM relies on this behaviour on x86 for modifying memory that is also * accessed from a hypervisor on the same CPU if running in a VM: don't change * this without also updating arch/x86/kernel/kvm.c */ static __always_inline bool arch___test_and_clear_bit(long nr, volatile unsigned long *addr) { bool oldbit; asm volatile(__ASM_SIZE(btr) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit) : ADDR, "Ir" (nr) : "memory"); return oldbit; } static __always_inline bool arch___test_and_change_bit(long nr, volatile unsigned long *addr) { bool oldbit; asm volatile(__ASM_SIZE(btc) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit) : ADDR, "Ir" (nr) : "memory"); return oldbit; } static __always_inline bool arch_test_and_change_bit(long nr, volatile unsigned long *addr) { return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr); } static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) { return ((1UL << (nr & (BITS_PER_LONG-1))) & (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; asm volatile(__ASM_SIZE(bt) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory"); return oldbit; } #define arch_test_bit(nr, addr) \ (__builtin_constant_p((nr)) \ ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) /** * __ffs - find first set bit in word * @word: The word to search * * Undefined if no bit exists, so code should check against 0 first. */ static __always_inline unsigned long __ffs(unsigned long word) { asm("rep; bsf %1,%0" : "=r" (word) : "rm" (word)); return word; } /** * ffz - find first zero bit in word * @word: The word to search * * Undefined if no zero exists, so code should check against ~0UL first. */ static __always_inline unsigned long ffz(unsigned long word) { asm("rep; bsf %1,%0" : "=r" (word) : "r" (~word)); return word; } /* * __fls: find last set bit in word * @word: The word to search * * Undefined if no set bit exists, so code should check against 0 first. */ static __always_inline unsigned long __fls(unsigned long word) { asm("bsr %1,%0" : "=r" (word) : "rm" (word)); return word; } #undef ADDR #ifdef __KERNEL__ /** * ffs - find first set bit in word * @x: the word to search * * This is defined the same way as the libc and compiler builtin ffs * routines, therefore differs in spirit from the other bitops. * * ffs(value) returns 0 if value is 0 or the position of the first * set bit if value is nonzero. The first (least significant) bit * is at position 1. */ static __always_inline int ffs(int x) { int r; #ifdef CONFIG_X86_64 /* * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the * dest reg is undefined if x==0, but their CPU architect says its * value is written to set it to the same as before, except that the * top 32 bits will be cleared. * * We cannot do this on 32 bits because at the very least some * 486 CPUs did not behave this way. */ asm("bsfl %1,%0" : "=r" (r) : "rm" (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsfl %1,%0\n\t" "cmovzl %2,%0" : "=&r" (r) : "rm" (x), "r" (-1)); #else asm("bsfl %1,%0\n\t" "jnz 1f\n\t" "movl $-1,%0\n" "1:" : "=r" (r) : "rm" (x)); #endif return r + 1; } /** * fls - find last set bit in word * @x: the word to search * * This is defined in a similar way as the libc and compiler builtin * ffs, but returns the position of the most significant set bit. * * fls(value) returns 0 if value is 0 or the position of the last * set bit if value is nonzero. The last (most significant) bit is * at position 32. */ static __always_inline int fls(unsigned int x) { int r; #ifdef CONFIG_X86_64 /* * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the * dest reg is undefined if x==0, but their CPU architect says its * value is written to set it to the same as before, except that the * top 32 bits will be cleared. * * We cannot do this on 32 bits because at the very least some * 486 CPUs did not behave this way. */ asm("bsrl %1,%0" : "=r" (r) : "rm" (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsrl %1,%0\n\t" "cmovzl %2,%0" : "=&r" (r) : "rm" (x), "rm" (-1)); #else asm("bsrl %1,%0\n\t" "jnz 1f\n\t" "movl $-1,%0\n" "1:" : "=r" (r) : "rm" (x)); #endif return r + 1; } /** * fls64 - find last set bit in a 64-bit word * @x: the word to search * * This is defined in a similar way as the libc and compiler builtin * ffsll, but returns the position of the most significant set bit. * * fls64(value) returns 0 if value is 0 or the position of the last * set bit if value is nonzero. The last (most significant) bit is * at position 64. */ #ifdef CONFIG_X86_64 static __always_inline int fls64(__u64 x) { int bitpos = -1; /* * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the * dest reg is undefined if x==0, but their CPU architect says its * value is written to set it to the same as before. */ asm("bsrq %1,%q0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; } #else #include <asm-generic/bitops/fls64.h> #endif #include <asm-generic/bitops/find.h> #include <asm-generic/bitops/sched.h> #include <asm/arch_hweight.h> #include <asm-generic/bitops/const_hweight.h> #include <asm-generic/bitops/instrumented-atomic.h> #include <asm-generic/bitops/instrumented-non-atomic.h> #include <asm-generic/bitops/instrumented-lock.h> #include <asm-generic/bitops/le.h> #include <asm-generic/bitops/ext2-atomic-setbit.h> #endif /* __KERNEL__ */ #endif /* _ASM_X86_BITOPS_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 /* SPDX-License-Identifier: GPL-2.0 */ /* * A security identifier table (sidtab) is a lookup table * of security context structures indexed by SID value. * * Original author: Stephen Smalley, <sds@tycho.nsa.gov> * Author: Ondrej Mosnacek, <omosnacek@gmail.com> * * Copyright (C) 2018 Red Hat, Inc. */ #ifndef _SS_SIDTAB_H_ #define _SS_SIDTAB_H_ #include <linux/spinlock_types.h> #include <linux/log2.h> #include <linux/hashtable.h> #include "context.h" struct sidtab_entry { u32 sid; u32 hash; struct context context; #if CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE > 0 struct sidtab_str_cache __rcu *cache; #endif struct hlist_node list; }; union sidtab_entry_inner { struct sidtab_node_inner *ptr_inner; struct sidtab_node_leaf *ptr_leaf; }; /* align node size to page boundary */ #define SIDTAB_NODE_ALLOC_SHIFT PAGE_SHIFT #define SIDTAB_NODE_ALLOC_SIZE PAGE_SIZE #define size_to_shift(size) ((size) == 1 ? 1 : (const_ilog2((size) - 1) + 1)) #define SIDTAB_INNER_SHIFT \ (SIDTAB_NODE_ALLOC_SHIFT - size_to_shift(sizeof(union sidtab_entry_inner))) #define SIDTAB_INNER_ENTRIES ((size_t)1 << SIDTAB_INNER_SHIFT) #define SIDTAB_LEAF_ENTRIES \ (SIDTAB_NODE_ALLOC_SIZE / sizeof(struct sidtab_entry)) #define SIDTAB_MAX_BITS 32 #define SIDTAB_MAX U32_MAX /* ensure enough tree levels for SIDTAB_MAX entries */ #define SIDTAB_MAX_LEVEL \ DIV_ROUND_UP(SIDTAB_MAX_BITS - size_to_shift(SIDTAB_LEAF_ENTRIES), \ SIDTAB_INNER_SHIFT) struct sidtab_node_leaf { struct sidtab_entry entries[SIDTAB_LEAF_ENTRIES]; }; struct sidtab_node_inner { union sidtab_entry_inner entries[SIDTAB_INNER_ENTRIES]; }; struct sidtab_isid_entry { int set; struct sidtab_entry entry; }; struct sidtab_convert_params { int (*func)(struct context *oldc, struct context *newc, void *args); void *args; struct sidtab *target; }; #define SIDTAB_HASH_BITS CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS #define SIDTAB_HASH_BUCKETS (1 << SIDTAB_HASH_BITS) struct sidtab { /* * lock-free read access only for as many items as a prior read of * 'count' */ union sidtab_entry_inner roots[SIDTAB_MAX_LEVEL + 1]; /* * access atomically via {READ|WRITE}_ONCE(); only increment under * spinlock */ u32 count; /* access only under spinlock */ struct sidtab_convert_params *convert; bool frozen; spinlock_t lock; #if CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE > 0 /* SID -> context string cache */ u32 cache_free_slots; struct list_head cache_lru_list; spinlock_t cache_lock; #endif /* index == SID - 1 (no entry for SECSID_NULL) */ struct sidtab_isid_entry isids[SECINITSID_NUM]; /* Hash table for fast reverse context-to-sid lookups. */ DECLARE_HASHTABLE(context_to_sid, SIDTAB_HASH_BITS); }; int sidtab_init(struct sidtab *s); int sidtab_set_initial(struct sidtab *s, u32 sid, struct context *context); struct sidtab_entry *sidtab_search_entry(struct sidtab *s, u32 sid); struct sidtab_entry *sidtab_search_entry_force(struct sidtab *s, u32 sid); static inline struct context *sidtab_search(struct sidtab *s, u32 sid) { struct sidtab_entry *entry = sidtab_search_entry(s, sid); return entry ? &entry->context : NULL; } static inline struct context *sidtab_search_force(struct sidtab *s, u32 sid) { struct sidtab_entry *entry = sidtab_search_entry_force(s, sid); return entry ? &entry->context : NULL; } int sidtab_convert(struct sidtab *s, struct sidtab_convert_params *params); void sidtab_cancel_convert(struct sidtab *s); void sidtab_freeze_begin(struct sidtab *s, unsigned long *flags) __acquires(&s->lock); void sidtab_freeze_end(struct sidtab *s, unsigned long *flags) __releases(&s->lock); int sidtab_context_to_sid(struct sidtab *s, struct context *context, u32 *sid); void sidtab_destroy(struct sidtab *s); int sidtab_hash_stats(struct sidtab *sidtab, char *page); #if CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE > 0 void sidtab_sid2str_put(struct sidtab *s, struct sidtab_entry *entry, const char *str, u32 str_len); int sidtab_sid2str_get(struct sidtab *s, struct sidtab_entry *entry, char **out, u32 *out_len); #else static inline void sidtab_sid2str_put(struct sidtab *s, struct sidtab_entry *entry, const char *str, u32 str_len) { } static inline int sidtab_sid2str_get(struct sidtab *s, struct sidtab_entry *entry, char **out, u32 *out_len) { return -ENOENT; } #endif /* CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE > 0 */ #endif /* _SS_SIDTAB_H_ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SWAP_H #define _LINUX_SWAP_H #include <linux/spinlock.h> #include <linux/linkage.h> #include <linux/mmzone.h> #include <linux/list.h> #include <linux/memcontrol.h> #include <linux/sched.h> #include <linux/node.h> #include <linux/fs.h> #include <linux/atomic.h> #include <linux/page-flags.h> #include <asm/page.h> struct notifier_block; struct bio; struct pagevec; #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_SHIFT 0 #define SWAP_FLAG_DISCARD 0x10000 /* enable discard for swap */ #define SWAP_FLAG_DISCARD_ONCE 0x20000 /* discard swap area at swapon-time */ #define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */ #define SWAP_FLAGS_VALID (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \ SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \ SWAP_FLAG_DISCARD_PAGES) #define SWAP_BATCH 64 static inline int current_is_kswapd(void) { return current->flags & PF_KSWAPD; } /* * MAX_SWAPFILES defines the maximum number of swaptypes: things which can * be swapped to. The swap type and the offset into that swap type are * encoded into pte's and into pgoff_t's in the swapcache. Using five bits * for the type means that the maximum number of swapcache pages is 27 bits * on 32-bit-pgoff_t architectures. And that assumes that the architecture packs * the type/offset into the pte as 5/27 as well. */ #define MAX_SWAPFILES_SHIFT 5 /* * Use some of the swap files numbers for other purposes. This * is a convenient way to hook into the VM to trigger special * actions on faults. */ /* * Unaddressable device memory support. See include/linux/hmm.h and * Documentation/vm/hmm.rst. Short description is we need struct pages for * device memory that is unaddressable (inaccessible) by CPU, so that we can * migrate part of a process memory to device memory. * * When a page is migrated from CPU to device, we set the CPU page table entry * to a special SWP_DEVICE_* entry. */ #ifdef CONFIG_DEVICE_PRIVATE #define SWP_DEVICE_NUM 2 #define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM) #define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1) #else #define SWP_DEVICE_NUM 0 #endif /* * NUMA node memory migration support */ #ifdef CONFIG_MIGRATION #define SWP_MIGRATION_NUM 2 #define SWP_MIGRATION_READ (MAX_SWAPFILES + SWP_HWPOISON_NUM) #define SWP_MIGRATION_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1) #else #define SWP_MIGRATION_NUM 0 #endif /* * Handling of hardware poisoned pages with memory corruption. */ #ifdef CONFIG_MEMORY_FAILURE #define SWP_HWPOISON_NUM 1 #define SWP_HWPOISON MAX_SWAPFILES #else #define SWP_HWPOISON_NUM 0 #endif #define MAX_SWAPFILES \ ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \ SWP_MIGRATION_NUM - SWP_HWPOISON_NUM) /* * Magic header for a swap area. The first part of the union is * what the swap magic looks like for the old (limited to 128MB) * swap area format, the second part of the union adds - in the * old reserved area - some extra information. Note that the first * kilobyte is reserved for boot loader or disk label stuff... * * Having the magic at the end of the PAGE_SIZE makes detecting swap * areas somewhat tricky on machines that support multiple page sizes. * For 2.5 we'll probably want to move the magic to just beyond the * bootbits... */ union swap_header { struct { char reserved[PAGE_SIZE - 10]; char magic[10]; /* SWAP-SPACE or SWAPSPACE2 */ } magic; struct { char bootbits[1024]; /* Space for disklabel etc. */ __u32 version; __u32 last_page; __u32 nr_badpages; unsigned char sws_uuid[16]; unsigned char sws_volume[16]; __u32 padding[117]; __u32 badpages[1]; } info; }; /* * current->reclaim_state points to one of these when a task is running * memory reclaim */ struct reclaim_state { unsigned long reclaimed_slab; }; #ifdef __KERNEL__ struct address_space; struct sysinfo; struct writeback_control; struct zone; /* * A swap extent maps a range of a swapfile's PAGE_SIZE pages onto a range of * disk blocks. A list of swap extents maps the entire swapfile. (Where the * term `swapfile' refers to either a blockdevice or an IS_REG file. Apart * from setup, they're handled identically. * * We always assume that blocks are of size PAGE_SIZE. */ struct swap_extent { struct rb_node rb_node; pgoff_t start_page; pgoff_t nr_pages; sector_t start_block; }; /* * Max bad pages in the new format.. */ #define MAX_SWAP_BADPAGES \ ((offsetof(union swap_header, magic.magic) - \ offsetof(union swap_header, info.badpages)) / sizeof(int)) enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ SWP_DISCARDABLE = (1 << 2), /* blkdev support discard */ SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */ SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */ SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */ SWP_BLKDEV = (1 << 6), /* its a block device */ SWP_ACTIVATED = (1 << 7), /* set after swap_activate success */ SWP_FS_OPS = (1 << 8), /* swapfile operations go through fs */ SWP_AREA_DISCARD = (1 << 9), /* single-time swap area discards */ SWP_PAGE_DISCARD = (1 << 10), /* freed swap page-cluster discards */ SWP_STABLE_WRITES = (1 << 11), /* no overwrite PG_writeback pages */ SWP_SYNCHRONOUS_IO = (1 << 12), /* synchronous IO is efficient */ SWP_VALID = (1 << 13), /* swap is valid to be operated on? */ /* add others here before... */ SWP_SCANNING = (1 << 14), /* refcount in scan_swap_map */ }; #define SWAP_CLUSTER_MAX 32UL #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX /* Bit flag in swap_map */ #define SWAP_HAS_CACHE 0x40 /* Flag page is cached, in first swap_map */ #define COUNT_CONTINUED 0x80 /* Flag swap_map continuation for full count */ /* Special value in first swap_map */ #define SWAP_MAP_MAX 0x3e /* Max count */ #define SWAP_MAP_BAD 0x3f /* Note page is bad */ #define SWAP_MAP_SHMEM 0xbf /* Owned by shmem/tmpfs */ /* Special value in each swap_map continuation */ #define SWAP_CONT_MAX 0x7f /* Max count */ /* * We use this to track usage of a cluster. A cluster is a block of swap disk * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All * free clusters are organized into a list. We fetch an entry from the list to * get a free cluster. * * The data field stores next cluster if the cluster is free or cluster usage * counter otherwise. The flags field determines if a cluster is free. This is * protected by swap_info_struct.lock. */ struct swap_cluster_info { spinlock_t lock; /* * Protect swap_cluster_info fields * and swap_info_struct->swap_map * elements correspond to the swap * cluster */ unsigned int data:24; unsigned int flags:8; }; #define CLUSTER_FLAG_FREE 1 /* This cluster is free */ #define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */ #define CLUSTER_FLAG_HUGE 4 /* This cluster is backing a transparent huge page */ /* * We assign a cluster to each CPU, so each CPU can allocate swap entry from * its own cluster and swapout sequentially. The purpose is to optimize swapout * throughput. */ struct percpu_cluster { struct swap_cluster_info index; /* Current cluster index */ unsigned int next; /* Likely next allocation offset */ }; struct swap_cluster_list { struct swap_cluster_info head; struct swap_cluster_info tail; }; /* * The in-memory structure used to track swap areas. */ struct swap_info_struct { unsigned long flags; /* SWP_USED etc: see above */ signed short prio; /* swap priority of this type */ struct plist_node list; /* entry in swap_active_head */ signed char type; /* strange name for an index */ unsigned int max; /* extent of the swap_map */ unsigned char *swap_map; /* vmalloc'ed array of usage counts */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ struct swap_cluster_list free_clusters; /* free clusters list */ unsigned int lowest_bit; /* index of first free in swap_map */ unsigned int highest_bit; /* index of last free in swap_map */ unsigned int pages; /* total of usable pages of swap */ unsigned int inuse_pages; /* number of those currently in use */ unsigned int cluster_next; /* likely index for next allocation */ unsigned int cluster_nr; /* countdown to next cluster search */ unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */ struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */ struct rb_root swap_extent_root;/* root of the swap extent rbtree */ struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ unsigned int old_block_size; /* seldom referenced */ #ifdef CONFIG_FRONTSWAP unsigned long *frontswap_map; /* frontswap in-use, one bit per page */ atomic_t frontswap_pages; /* frontswap pages in-use counter */ #endif spinlock_t lock; /* * protect map scan related fields like * swap_map, lowest_bit, highest_bit, * inuse_pages, cluster_next, * cluster_nr, lowest_alloc, * highest_alloc, free/discard cluster * list. other fields are only changed * at swapon/swapoff, so are protected * by swap_lock. changing flags need * hold this lock and swap_lock. If * both locks need hold, hold swap_lock * first. */ spinlock_t cont_lock; /* * protect swap count continuation page * list. */ struct work_struct discard_work; /* discard worker */ struct swap_cluster_list discard_clusters; /* discard clusters list */ struct plist_node avail_lists[]; /* * entries in swap_avail_heads, one * entry per node. * Must be last as the number of the * array is nr_node_ids, which is not * a fixed value so have to allocate * dynamically. * And it has to be an array so that * plist_for_each_* can work. */ }; #ifdef CONFIG_64BIT #define SWAP_RA_ORDER_CEILING 5 #else /* Avoid stack overflow, because we need to save part of page table */ #define SWAP_RA_ORDER_CEILING 3 #define SWAP_RA_PTE_CACHE_SIZE (1 << SWAP_RA_ORDER_CEILING) #endif struct vma_swap_readahead { unsigned short win; unsigned short offset; unsigned short nr_pte; #ifdef CONFIG_64BIT pte_t *ptes; #else pte_t ptes[SWAP_RA_PTE_CACHE_SIZE]; #endif }; /* linux/mm/workingset.c */ void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg); void workingset_refault(struct page *page, void *shadow); void workingset_activation(struct page *page); /* Only track the nodes of mappings with shadow entries */ void workingset_update_node(struct xa_node *node); #define mapping_set_update(xas, mapping) do { \ if (!dax_mapping(mapping) && !shmem_mapping(mapping)) \ xas_set_update(xas, workingset_update_node); \ } while (0) /* linux/mm/page_alloc.c */ extern unsigned long totalreserve_pages; extern unsigned long nr_free_buffer_pages(void); /* Definition of global_zone_page_state not available yet */ #define nr_free_pages() global_zone_page_state(NR_FREE_PAGES) /* linux/mm/swap.c */ extern void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages); extern void lru_note_cost_page(struct page *); extern void lru_cache_add(struct page *); extern void lru_add_page_tail(struct page *page, struct page *page_tail, struct lruvec *lruvec, struct list_head *head); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); extern void rotate_reclaimable_page(struct page *page); extern void deactivate_file_page(struct page *page); extern void deactivate_page(struct page *page); extern void mark_page_lazyfree(struct page *page); extern void swap_setup(void); extern void lru_cache_add_inactive_or_unevictable(struct page *page, struct vm_area_struct *vma); /* linux/mm/vmscan.c */ extern unsigned long zone_reclaimable_pages(struct zone *zone); extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, bool may_swap); extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, pg_data_t *pgdat, unsigned long *nr_scanned); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; extern int remove_mapping(struct address_space *mapping, struct page *page); extern unsigned long reclaim_pages(struct list_head *page_list); #ifdef CONFIG_NUMA extern int node_reclaim_mode; extern int sysctl_min_unmapped_ratio; extern int sysctl_min_slab_ratio; #else #define node_reclaim_mode 0 #endif extern void check_move_unevictable_pages(struct pagevec *pvec); extern int kswapd_run(int nid); extern void kswapd_stop(int nid); #ifdef CONFIG_SWAP #include <linux/blk_types.h> /* for bio_end_io_t */ /* linux/mm/page_io.c */ extern int swap_readpage(struct page *page, bool do_poll); extern int swap_writepage(struct page *page, struct writeback_control *wbc); extern void end_swap_bio_write(struct bio *bio); extern int __swap_writepage(struct page *page, struct writeback_control *wbc, bio_end_io_t end_write_func); extern int swap_set_page_dirty(struct page *page); int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block); int generic_swapfile_activate(struct swap_info_struct *, struct file *, sector_t *); /* linux/mm/swap_state.c */ /* One swap address space for each 64M swap space */ #define SWAP_ADDRESS_SPACE_SHIFT 14 #define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT) extern struct address_space *swapper_spaces[]; #define swap_address_space(entry) \ (&swapper_spaces[swp_type(entry)][swp_offset(entry) \ >> SWAP_ADDRESS_SPACE_SHIFT]) extern unsigned long total_swapcache_pages(void); extern void show_swap_cache_info(void); extern int add_to_swap(struct page *page); extern void *get_shadow_from_swap_cache(swp_entry_t entry); extern int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp, void **shadowp); extern void __delete_from_swap_cache(struct page *page, swp_entry_t entry, void *shadow); extern void delete_from_swap_cache(struct page *); extern void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end); extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct page **, int); extern struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr); struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index); extern struct page *read_swap_cache_async(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr, bool do_poll); extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr, bool *new_page_allocated); extern struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t flag, struct vm_fault *vmf); extern struct page *swapin_readahead(swp_entry_t entry, gfp_t flag, struct vm_fault *vmf); /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; extern long total_swap_pages; extern atomic_t nr_rotate_swap; extern bool has_usable_swap(void); /* Swap 50% full? Release swapcache more aggressively.. */ static inline bool vm_swap_full(void) { return atomic_long_read(&nr_swap_pages) * 2 < total_swap_pages; } static inline long get_nr_swap_pages(void) { return atomic_long_read(&nr_swap_pages); } extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(struct page *page); extern void put_swap_page(struct page *page, swp_entry_t entry); extern swp_entry_t get_swap_page_of_type(int); extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); extern int swap_duplicate(swp_entry_t); extern int swapcache_prepare(swp_entry_t); extern void swap_free(swp_entry_t); extern void swapcache_free_entries(swp_entry_t *entries, int n); extern int free_swap_and_cache(swp_entry_t); int swap_type_of(dev_t device, sector_t offset); int find_first_swap(dev_t *device); extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct page *, struct block_device **); extern sector_t swapdev_block(int, pgoff_t); extern int page_swapcount(struct page *); extern int __swap_count(swp_entry_t entry); extern int __swp_swapcount(swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); extern struct swap_info_struct *swp_swap_info(swp_entry_t entry); extern bool reuse_swap_page(struct page *, int *); extern int try_to_free_swap(struct page *); struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); extern void exit_swap_address_space(unsigned int type); extern struct swap_info_struct *get_swap_device(swp_entry_t entry); sector_t swap_page_sector(struct page *page); static inline void put_swap_device(struct swap_info_struct *si) { rcu_read_unlock(); } #else /* CONFIG_SWAP */ static inline int swap_readpage(struct page *page, bool do_poll) { return 0; } static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return NULL; } #define swap_address_space(entry) (NULL) #define get_nr_swap_pages() 0L #define total_swap_pages 0L #define total_swapcache_pages() 0UL #define vm_swap_full() 0 #define si_swapinfo(val) \ do { (val)->freeswap = (val)->totalswap = 0; } while (0) /* only sparc can not include linux/pagemap.h in this file * so leave put_page and release_pages undeclared... */ #define free_page_and_swap_cache(page) \ put_page(page) #define free_pages_and_swap_cache(pages, nr) \ release_pages((pages), (nr)); static inline void show_swap_cache_info(void) { } #define free_swap_and_cache(e) ({(is_migration_entry(e) || is_device_private_entry(e));}) #define swapcache_prepare(e) ({(is_migration_entry(e) || is_device_private_entry(e));}) static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask) { return 0; } static inline void swap_shmem_alloc(swp_entry_t swp) { } static inline int swap_duplicate(swp_entry_t swp) { return 0; } static inline void swap_free(swp_entry_t swp) { } static inline void put_swap_page(struct page *page, swp_entry_t swp) { } static inline struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, struct vm_fault *vmf) { return NULL; } static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, struct vm_fault *vmf) { return NULL; } static inline int swap_writepage(struct page *p, struct writeback_control *wbc) { return 0; } static inline struct page *lookup_swap_cache(swp_entry_t swp, struct vm_area_struct *vma, unsigned long addr) { return NULL; } static inline struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index) { return find_get_page(mapping, index); } static inline int add_to_swap(struct page *page) { return 0; } static inline void *get_shadow_from_swap_cache(swp_entry_t entry) { return NULL; } static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask, void **shadowp) { return -1; } static inline void __delete_from_swap_cache(struct page *page, swp_entry_t entry, void *shadow) { } static inline void delete_from_swap_cache(struct page *page) { } static inline void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end) { } static inline int page_swapcount(struct page *page) { return 0; } static inline int __swap_count(swp_entry_t entry) { return 0; } static inline int __swp_swapcount(swp_entry_t entry) { return 0; } static inline int swp_swapcount(swp_entry_t entry) { return 0; } #define reuse_swap_page(page, total_map_swapcount) \ (page_trans_huge_mapcount(page, total_map_swapcount) == 1) static inline int try_to_free_swap(struct page *page) { return 0; } static inline swp_entry_t get_swap_page(struct page *page) { swp_entry_t entry; entry.val = 0; return entry; } #endif /* CONFIG_SWAP */ #ifdef CONFIG_THP_SWAP extern int split_swap_cluster(swp_entry_t entry); #else static inline int split_swap_cluster(swp_entry_t entry) { return 0; } #endif #ifdef CONFIG_MEMCG static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg) { /* Cgroup2 doesn't have per-cgroup swappiness */ if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) return vm_swappiness; /* root ? */ if (mem_cgroup_disabled() || mem_cgroup_is_root(memcg)) return vm_swappiness; return memcg->swappiness; } #else static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) { return vm_swappiness; } #endif #if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) extern void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask); #else static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) { } #endif #ifdef CONFIG_MEMCG_SWAP extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry); extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); extern void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg); extern bool mem_cgroup_swap_full(struct page *page); #else static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry) { } static inline int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) { return 0; } static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) { } static inline long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg) { return get_nr_swap_pages(); } static inline bool mem_cgroup_swap_full(struct page *page) { return vm_swap_full(); } #endif #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * linux/drivers/char/serial_core.h * * Copyright (C) 2000 Deep Blue Solutions Ltd. */ #ifndef LINUX_SERIAL_CORE_H #define LINUX_SERIAL_CORE_H #include <linux/bitops.h> #include <linux/compiler.h> #include <linux/console.h> #include <linux/interrupt.h> #include <linux/circ_buf.h> #include <linux/spinlock.h> #include <linux/sched.h> #include <linux/tty.h> #include <linux/mutex.h> #include <linux/sysrq.h> #include <uapi/linux/serial_core.h> #ifdef CONFIG_SERIAL_CORE_CONSOLE #define uart_console(port) \ ((port)->cons && (port)->cons->index == (port)->line) #else #define uart_console(port) ({ (void)port; 0; }) #endif struct uart_port; struct serial_struct; struct device; struct gpio_desc; /* * This structure describes all the operations that can be done on the * physical hardware. See Documentation/driver-api/serial/driver.rst for details. */ struct uart_ops { unsigned int (*tx_empty)(struct uart_port *); void (*set_mctrl)(struct uart_port *, unsigned int mctrl); unsigned int (*get_mctrl)(struct uart_port *); void (*stop_tx)(struct uart_port *); void (*start_tx)(struct uart_port *); void (*throttle)(struct uart_port *); void (*unthrottle)(struct uart_port *); void (*send_xchar)(struct uart_port *, char ch); void (*stop_rx)(struct uart_port *); void (*enable_ms)(struct uart_port *); void (*break_ctl)(struct uart_port *, int ctl); int (*startup)(struct uart_port *); void (*shutdown)(struct uart_port *); void (*flush_buffer)(struct uart_port *); void (*set_termios)(struct uart_port *, struct ktermios *new, struct ktermios *old); void (*set_ldisc)(struct uart_port *, struct ktermios *); void (*pm)(struct uart_port *, unsigned int state, unsigned int oldstate); /* * Return a string describing the type of the port */ const char *(*type)(struct uart_port *); /* * Release IO and memory resources used by the port. * This includes iounmap if necessary. */ void (*release_port)(struct uart_port *); /* * Request IO and memory resources used by the port. * This includes iomapping the port if necessary. */ int (*request_port)(struct uart_port *); void (*config_port)(struct uart_port *, int); int (*verify_port)(struct uart_port *, struct serial_struct *); int (*ioctl)(struct uart_port *, unsigned int, unsigned long); #ifdef CONFIG_CONSOLE_POLL int (*poll_init)(struct uart_port *); void (*poll_put_char)(struct uart_port *, unsigned char); int (*poll_get_char)(struct uart_port *); #endif }; #define NO_POLL_CHAR 0x00ff0000 #define UART_CONFIG_TYPE (1 << 0) #define UART_CONFIG_IRQ (1 << 1) struct uart_icount { __u32 cts; __u32 dsr; __u32 rng; __u32 dcd; __u32 rx; __u32 tx; __u32 frame; __u32 overrun; __u32 parity; __u32 brk; __u32 buf_overrun; }; typedef unsigned int __bitwise upf_t; typedef unsigned int __bitwise upstat_t; struct uart_port { spinlock_t lock; /* port lock */ unsigned long iobase; /* in/out[bwl] */ unsigned char __iomem *membase; /* read/write[bwl] */ unsigned int (*serial_in)(struct uart_port *, int); void (*serial_out)(struct uart_port *, int, int); void (*set_termios)(struct uart_port *, struct ktermios *new, struct ktermios *old); void (*set_ldisc)(struct uart_port *, struct ktermios *); unsigned int (*get_mctrl)(struct uart_port *); void (*set_mctrl)(struct uart_port *, unsigned int); unsigned int (*get_divisor)(struct uart_port *, unsigned int baud, unsigned int *frac); void (*set_divisor)(struct uart_port *, unsigned int baud, unsigned int quot, unsigned int quot_frac); int (*startup)(struct uart_port *port); void (*shutdown)(struct uart_port *port); void (*throttle)(struct uart_port *port); void (*unthrottle)(struct uart_port *port); int (*handle_irq)(struct uart_port *); void (*pm)(struct uart_port *, unsigned int state, unsigned int old); void (*handle_break)(struct uart_port *); int (*rs485_config)(struct uart_port *, struct serial_rs485 *rs485); int (*iso7816_config)(struct uart_port *, struct serial_iso7816 *iso7816); unsigned int irq; /* irq number */ unsigned long irqflags; /* irq flags */ unsigned int uartclk; /* base uart clock */ unsigned int fifosize; /* tx fifo size */ unsigned char x_char; /* xon/xoff char */ unsigned char regshift; /* reg offset shift */ unsigned char iotype; /* io access style */ unsigned char quirks; /* internal quirks */ #define UPIO_PORT (SERIAL_IO_PORT) /* 8b I/O port access */ #define UPIO_HUB6 (SERIAL_IO_HUB6) /* Hub6 ISA card */ #define UPIO_MEM (SERIAL_IO_MEM) /* driver-specific */ #define UPIO_MEM32 (SERIAL_IO_MEM32) /* 32b little endian */ #define UPIO_AU (SERIAL_IO_AU) /* Au1x00 and RT288x type IO */ #define UPIO_TSI (SERIAL_IO_TSI) /* Tsi108/109 type IO */ #define UPIO_MEM32BE (SERIAL_IO_MEM32BE) /* 32b big endian */ #define UPIO_MEM16 (SERIAL_IO_MEM16) /* 16b little endian */ /* quirks must be updated while holding port mutex */ #define UPQ_NO_TXEN_TEST BIT(0) unsigned int read_status_mask; /* driver specific */ unsigned int ignore_status_mask; /* driver specific */ struct uart_state *state; /* pointer to parent state */ struct uart_icount icount; /* statistics */ struct console *cons; /* struct console, if any */ /* flags must be updated while holding port mutex */ upf_t flags; /* * These flags must be equivalent to the flags defined in * include/uapi/linux/tty_flags.h which are the userspace definitions * assigned from the serial_struct flags in uart_set_info() * [for bit definitions in the UPF_CHANGE_MASK] * * Bits [0..UPF_LAST_USER] are userspace defined/visible/changeable * The remaining bits are serial-core specific and not modifiable by * userspace. */ #define UPF_FOURPORT ((__force upf_t) ASYNC_FOURPORT /* 1 */ ) #define UPF_SAK ((__force upf_t) ASYNC_SAK /* 2 */ ) #define UPF_SPD_HI ((__force upf_t) ASYNC_SPD_HI /* 4 */ ) #define UPF_SPD_VHI ((__force upf_t) ASYNC_SPD_VHI /* 5 */ ) #define UPF_SPD_CUST ((__force upf_t) ASYNC_SPD_CUST /* 0x0030 */ ) #define UPF_SPD_WARP ((__force upf_t) ASYNC_SPD_WARP /* 0x1010 */ ) #define UPF_SPD_MASK ((__force upf_t) ASYNC_SPD_MASK /* 0x1030 */ ) #define UPF_SKIP_TEST ((__force upf_t) ASYNC_SKIP_TEST /* 6 */ ) #define UPF_AUTO_IRQ ((__force upf_t) ASYNC_AUTO_IRQ /* 7 */ ) #define UPF_HARDPPS_CD ((__force upf_t) ASYNC_HARDPPS_CD /* 11 */ ) #define UPF_SPD_SHI ((__force upf_t) ASYNC_SPD_SHI /* 12 */ ) #define UPF_LOW_LATENCY ((__force upf_t) ASYNC_LOW_LATENCY /* 13 */ ) #define UPF_BUGGY_UART ((__force upf_t) ASYNC_BUGGY_UART /* 14 */ ) #define UPF_MAGIC_MULTIPLIER ((__force upf_t) ASYNC_MAGIC_MULTIPLIER /* 16 */ ) #define UPF_NO_THRE_TEST ((__force upf_t) (1 << 19)) /* Port has hardware-assisted h/w flow control */ #define UPF_AUTO_CTS ((__force upf_t) (1 << 20)) #define UPF_AUTO_RTS ((__force upf_t) (1 << 21)) #define UPF_HARD_FLOW ((__force upf_t) (UPF_AUTO_CTS | UPF_AUTO_RTS)) /* Port has hardware-assisted s/w flow control */ #define UPF_SOFT_FLOW ((__force upf_t) (1 << 22)) #define UPF_CONS_FLOW ((__force upf_t) (1 << 23)) #define UPF_SHARE_IRQ ((__force upf_t) (1 << 24)) #define UPF_EXAR_EFR ((__force upf_t) (1 << 25)) #define UPF_BUG_THRE ((__force upf_t) (1 << 26)) /* The exact UART type is known and should not be probed. */ #define UPF_FIXED_TYPE ((__force upf_t) (1 << 27)) #define UPF_BOOT_AUTOCONF ((__force upf_t) (1 << 28)) #define UPF_FIXED_PORT ((__force upf_t) (1 << 29)) #define UPF_DEAD ((__force upf_t) (1 << 30)) #define UPF_IOREMAP ((__force upf_t) (1 << 31)) #define __UPF_CHANGE_MASK 0x17fff #define UPF_CHANGE_MASK ((__force upf_t) __UPF_CHANGE_MASK) #define UPF_USR_MASK ((__force upf_t) (UPF_SPD_MASK|UPF_LOW_LATENCY)) #if __UPF_CHANGE_MASK > ASYNC_FLAGS #error Change mask not equivalent to userspace-visible bit defines #endif /* * Must hold termios_rwsem, port mutex and port lock to change; * can hold any one lock to read. */ upstat_t status; #define UPSTAT_CTS_ENABLE ((__force upstat_t) (1 << 0)) #define UPSTAT_DCD_ENABLE ((__force upstat_t) (1 << 1)) #define UPSTAT_AUTORTS ((__force upstat_t) (1 << 2)) #define UPSTAT_AUTOCTS ((__force upstat_t) (1 << 3)) #define UPSTAT_AUTOXOFF ((__force upstat_t) (1 << 4)) #define UPSTAT_SYNC_FIFO ((__force upstat_t) (1 << 5)) int hw_stopped; /* sw-assisted CTS flow state */ unsigned int mctrl; /* current modem ctrl settings */ unsigned int timeout; /* character-based timeout */ unsigned int type; /* port type */ const struct uart_ops *ops; unsigned int custom_divisor; unsigned int line; /* port index */ unsigned int minor; resource_size_t mapbase; /* for ioremap */ resource_size_t mapsize; struct device *dev; /* parent device */ unsigned long sysrq; /* sysrq timeout */ unsigned int sysrq_ch; /* char for sysrq */ unsigned char has_sysrq; unsigned char sysrq_seq; /* index in sysrq_toggle_seq */ unsigned char hub6; /* this should be in the 8250 driver */ unsigned char suspended; unsigned char console_reinit; const char *name; /* port name */ struct attribute_group *attr_group; /* port specific attributes */ const struct attribute_group **tty_groups; /* all attributes (serial core use only) */ struct serial_rs485 rs485; struct gpio_desc *rs485_term_gpio; /* enable RS485 bus termination */ struct serial_iso7816 iso7816; void *private_data; /* generic platform data pointer */ }; static inline int serial_port_in(struct uart_port *up, int offset) { return up->serial_in(up, offset); } static inline void serial_port_out(struct uart_port *up, int offset, int value) { up->serial_out(up, offset, value); } /** * enum uart_pm_state - power states for UARTs * @UART_PM_STATE_ON: UART is powered, up and operational * @UART_PM_STATE_OFF: UART is powered off * @UART_PM_STATE_UNDEFINED: sentinel */ enum uart_pm_state { UART_PM_STATE_ON = 0, UART_PM_STATE_OFF = 3, /* number taken from ACPI */ UART_PM_STATE_UNDEFINED, }; /* * This is the state information which is persistent across opens. */ struct uart_state { struct tty_port port; enum uart_pm_state pm_state; struct circ_buf xmit; atomic_t refcount; wait_queue_head_t remove_wait; struct uart_port *uart_port; }; #define UART_XMIT_SIZE PAGE_SIZE /* number of characters left in xmit buffer before we ask for more */ #define WAKEUP_CHARS 256 struct module; struct tty_driver; struct uart_driver { struct module *owner; const char *driver_name; const char *dev_name; int major; int minor; int nr; struct console *cons; /* * these are private; the low level driver should not * touch these; they should be initialised to NULL */ struct uart_state *state; struct tty_driver *tty_driver; }; void uart_write_wakeup(struct uart_port *port); /* * Baud rate helpers. */ void uart_update_timeout(struct uart_port *port, unsigned int cflag, unsigned int baud); unsigned int uart_get_baud_rate(struct uart_port *port, struct ktermios *termios, struct ktermios *old, unsigned int min, unsigned int max); unsigned int uart_get_divisor(struct uart_port *port, unsigned int baud); /* Base timer interval for polling */ static inline int uart_poll_timeout(struct uart_port *port) { int timeout = port->timeout; return timeout > 6 ? (timeout / 2 - 2) : 1; } /* * Console helpers. */ struct earlycon_device { struct console *con; struct uart_port port; char options[16]; /* e.g., 115200n8 */ unsigned int baud; }; struct earlycon_id { char name[15]; char name_term; /* In case compiler didn't '\0' term name */ char compatible[128]; int (*setup)(struct earlycon_device *, const char *options); }; extern const struct earlycon_id *__earlycon_table[]; extern const struct earlycon_id *__earlycon_table_end[]; #if defined(CONFIG_SERIAL_EARLYCON) && !defined(MODULE) #define EARLYCON_USED_OR_UNUSED __used #else #define EARLYCON_USED_OR_UNUSED __maybe_unused #endif #define _OF_EARLYCON_DECLARE(_name, compat, fn, unique_id) \ static const struct earlycon_id unique_id \ EARLYCON_USED_OR_UNUSED __initconst \ = { .name = __stringify(_name), \ .compatible = compat, \ .setup = fn }; \ static const struct earlycon_id EARLYCON_USED_OR_UNUSED \ __section("__earlycon_table") \ * const __PASTE(__p, unique_id) = &unique_id #define OF_EARLYCON_DECLARE(_name, compat, fn) \ _OF_EARLYCON_DECLARE(_name, compat, fn, \ __UNIQUE_ID(__earlycon_##_name)) #define EARLYCON_DECLARE(_name, fn) OF_EARLYCON_DECLARE(_name, "", fn) extern int of_setup_earlycon(const struct earlycon_id *match, unsigned long node, const char *options); #ifdef CONFIG_SERIAL_EARLYCON extern bool earlycon_acpi_spcr_enable __initdata; int setup_earlycon(char *buf); #else static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED; static inline int setup_earlycon(char *buf) { return 0; } #endif struct uart_port *uart_get_console(struct uart_port *ports, int nr, struct console *c); int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr, char **options); void uart_parse_options(const char *options, int *baud, int *parity, int *bits, int *flow); int uart_set_options(struct uart_port *port, struct console *co, int baud, int parity, int bits, int flow); struct tty_driver *uart_console_device(struct console *co, int *index); void uart_console_write(struct uart_port *port, const char *s, unsigned int count, void (*putchar)(struct uart_port *, int)); /* * Port/driver registration/removal */ int uart_register_driver(struct uart_driver *uart); void uart_unregister_driver(struct uart_driver *uart); int uart_add_one_port(struct uart_driver *reg, struct uart_port *port); int uart_remove_one_port(struct uart_driver *reg, struct uart_port *port); int uart_match_port(struct uart_port *port1, struct uart_port *port2); /* * Power Management */ int uart_suspend_port(struct uart_driver *reg, struct uart_port *port); int uart_resume_port(struct uart_driver *reg, struct uart_port *port); #define uart_circ_empty(circ) ((circ)->head == (circ)->tail) #define uart_circ_clear(circ) ((circ)->head = (circ)->tail = 0) #define uart_circ_chars_pending(circ) \ (CIRC_CNT((circ)->head, (circ)->tail, UART_XMIT_SIZE)) #define uart_circ_chars_free(circ) \ (CIRC_SPACE((circ)->head, (circ)->tail, UART_XMIT_SIZE)) static inline int uart_tx_stopped(struct uart_port *port) { struct tty_struct *tty = port->state->port.tty; if ((tty && tty->stopped) || port->hw_stopped) return 1; return 0; } static inline bool uart_cts_enabled(struct uart_port *uport) { return !!(uport->status & UPSTAT_CTS_ENABLE); } static inline bool uart_softcts_mode(struct uart_port *uport) { upstat_t mask = UPSTAT_CTS_ENABLE | UPSTAT_AUTOCTS; return ((uport->status & mask) == UPSTAT_CTS_ENABLE); } /* * The following are helper functions for the low level drivers. */ extern void uart_handle_dcd_change(struct uart_port *uport, unsigned int status); extern void uart_handle_cts_change(struct uart_port *uport, unsigned int status); extern void uart_insert_char(struct uart_port *port, unsigned int status, unsigned int overrun, unsigned int ch, unsigned int flag); #ifdef CONFIG_MAGIC_SYSRQ_SERIAL #define SYSRQ_TIMEOUT (HZ * 5) bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch); static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) { if (!port->sysrq) return 0; if (ch && time_before(jiffies, port->sysrq)) { if (sysrq_mask()) { handle_sysrq(ch); port->sysrq = 0; return 1; } if (uart_try_toggle_sysrq(port, ch)) return 1; } port->sysrq = 0; return 0; } static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) { if (!port->sysrq) return 0; if (ch && time_before(jiffies, port->sysrq)) { if (sysrq_mask()) { port->sysrq_ch = ch; port->sysrq = 0; return 1; } if (uart_try_toggle_sysrq(port, ch)) return 1; } port->sysrq = 0; return 0; } static inline void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) { int sysrq_ch; if (!port->has_sysrq) { spin_unlock_irqrestore(&port->lock, irqflags); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; spin_unlock_irqrestore(&port->lock, irqflags); if (sysrq_ch) handle_sysrq(sysrq_ch); } #else /* CONFIG_MAGIC_SYSRQ_SERIAL */ static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) { return 0; } static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) { return 0; } static inline void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) { spin_unlock_irqrestore(&port->lock, irqflags); } #endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ /* * We do the SysRQ and SAK checking like this... */ static inline int uart_handle_break(struct uart_port *port) { struct uart_state *state = port->state; if (port->handle_break) port->handle_break(port); #ifdef CONFIG_MAGIC_SYSRQ_SERIAL if (port->has_sysrq && uart_console(port)) { if (!port->sysrq) { port->sysrq = jiffies + SYSRQ_TIMEOUT; return 1; } port->sysrq = 0; } #endif if (port->flags & UPF_SAK) do_SAK(state->port.tty); return 0; } /* * UART_ENABLE_MS - determine if port should enable modem status irqs */ #define UART_ENABLE_MS(port,cflag) ((port)->flags & UPF_HARDPPS_CD || \ (cflag) & CRTSCTS || \ !((cflag) & CLOCAL)) int uart_get_rs485_mode(struct uart_port *port); #endif /* LINUX_SERIAL_CORE_H */
1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 /* SPDX-License-Identifier: GPL-2.0+ */ /* * Read-Copy Update mechanism for mutual exclusion * * Copyright IBM Corporation, 2001 * * Author: Dipankar Sarma <dipankar@in.ibm.com> * * Based on the original work by Paul McKenney <paulmck@vnet.ibm.com> * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. * Papers: * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) * * For detailed explanation of Read-Copy Update mechanism see - * http://lse.sourceforge.net/locking/rcupdate.html * */ #ifndef __LINUX_RCUPDATE_H #define __LINUX_RCUPDATE_H #include <linux/types.h> #include <linux/compiler.h> #include <linux/atomic.h> #include <linux/irqflags.h> #include <linux/preempt.h> #include <linux/bottom_half.h> #include <linux/lockdep.h> #include <asm/processor.h> #include <linux/cpumask.h> #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) #define ulong2long(a) (*(long *)(&(a))) #define USHORT_CMP_GE(a, b) (USHRT_MAX / 2 >= (unsigned short)((a) - (b))) #define USHORT_CMP_LT(a, b) (USHRT_MAX / 2 < (unsigned short)((a) - (b))) /* Exported common interfaces */ void call_rcu(struct rcu_head *head, rcu_callback_t func); void rcu_barrier_tasks(void); void rcu_barrier_tasks_rude(void); void synchronize_rcu(void); #ifdef CONFIG_PREEMPT_RCU void __rcu_read_lock(void); void __rcu_read_unlock(void); /* * Defined as a macro as it is a very low level header included from * areas that don't even know about current. This gives the rcu_read_lock() * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. */ #define rcu_preempt_depth() (current->rcu_read_lock_nesting) #else /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TINY_RCU #define rcu_read_unlock_strict() do { } while (0) #else void rcu_read_unlock_strict(void); #endif static inline void __rcu_read_lock(void) { preempt_disable(); } static inline void __rcu_read_unlock(void) { preempt_enable(); rcu_read_unlock_strict(); } static inline int rcu_preempt_depth(void) { return 0; } #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ /* Internal to kernel */ void rcu_init(void); extern int rcu_scheduler_active __read_mostly; void rcu_sched_clock_irq(int user); void rcu_report_dead(unsigned int cpu); void rcutree_migrate_callbacks(int cpu); #ifdef CONFIG_TASKS_RCU_GENERIC void rcu_init_tasks_generic(void); #else static inline void rcu_init_tasks_generic(void) { } #endif #ifdef CONFIG_RCU_STALL_COMMON void rcu_sysrq_start(void); void rcu_sysrq_end(void); #else /* #ifdef CONFIG_RCU_STALL_COMMON */ static inline void rcu_sysrq_start(void) { } static inline void rcu_sysrq_end(void) { } #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */ #ifdef CONFIG_NO_HZ_FULL void rcu_user_enter(void); void rcu_user_exit(void); #else static inline void rcu_user_enter(void) { } static inline void rcu_user_exit(void) { } #endif /* CONFIG_NO_HZ_FULL */ #ifdef CONFIG_RCU_NOCB_CPU void rcu_init_nohz(void); void rcu_nocb_flush_deferred_wakeup(void); #else /* #ifdef CONFIG_RCU_NOCB_CPU */ static inline void rcu_init_nohz(void) { } static inline void rcu_nocb_flush_deferred_wakeup(void) { } #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ /** * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers * @a: Code that RCU needs to pay attention to. * * RCU read-side critical sections are forbidden in the inner idle loop, * that is, between the rcu_idle_enter() and the rcu_idle_exit() -- RCU * will happily ignore any such read-side critical sections. However, * things like powertop need tracepoints in the inner idle loop. * * This macro provides the way out: RCU_NONIDLE(do_something_with_RCU()) * will tell RCU that it needs to pay attention, invoke its argument * (in this example, calling the do_something_with_RCU() function), * and then tell RCU to go back to ignoring this CPU. It is permissible * to nest RCU_NONIDLE() wrappers, but not indefinitely (but the limit is * on the order of a million or so, even on 32-bit systems). It is * not legal to block within RCU_NONIDLE(), nor is it permissible to * transfer control either into or out of RCU_NONIDLE()'s statement. */ #define RCU_NONIDLE(a) \ do { \ rcu_irq_enter_irqson(); \ do { a; } while (0); \ rcu_irq_exit_irqson(); \ } while (0) /* * Note a quasi-voluntary context switch for RCU-tasks's benefit. * This is a macro rather than an inline function to avoid #include hell. */ #ifdef CONFIG_TASKS_RCU_GENERIC # ifdef CONFIG_TASKS_RCU # define rcu_tasks_classic_qs(t, preempt) \ do { \ if (!(preempt) && READ_ONCE((t)->rcu_tasks_holdout)) \ WRITE_ONCE((t)->rcu_tasks_holdout, false); \ } while (0) void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks(void); # else # define rcu_tasks_classic_qs(t, preempt) do { } while (0) # define call_rcu_tasks call_rcu # define synchronize_rcu_tasks synchronize_rcu # endif # ifdef CONFIG_TASKS_TRACE_RCU # define rcu_tasks_trace_qs(t) \ do { \ if (!likely(READ_ONCE((t)->trc_reader_checked)) && \ !unlikely(READ_ONCE((t)->trc_reader_nesting))) { \ smp_store_release(&(t)->trc_reader_checked, true); \ smp_mb(); /* Readers partitioned by store. */ \ } \ } while (0) # else # define rcu_tasks_trace_qs(t) do { } while (0) # endif #define rcu_tasks_qs(t, preempt) \ do { \ rcu_tasks_classic_qs((t), (preempt)); \ rcu_tasks_trace_qs((t)); \ } while (0) # ifdef CONFIG_TASKS_RUDE_RCU void call_rcu_tasks_rude(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks_rude(void); # endif #define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false) void exit_tasks_rcu_start(void); void exit_tasks_rcu_finish(void); #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ #define rcu_tasks_qs(t, preempt) do { } while (0) #define rcu_note_voluntary_context_switch(t) do { } while (0) #define call_rcu_tasks call_rcu #define synchronize_rcu_tasks synchronize_rcu static inline void exit_tasks_rcu_start(void) { } static inline void exit_tasks_rcu_finish(void) { } #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ /** * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU * * This macro resembles cond_resched(), except that it is defined to * report potential quiescent states to RCU-tasks even if the cond_resched() * machinery were to be shut off, as some advocate for PREEMPTION kernels. */ #define cond_resched_tasks_rcu_qs() \ do { \ rcu_tasks_qs(current, false); \ cond_resched(); \ } while (0) /* * Infrastructure to implement the synchronize_() primitives in * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. */ #if defined(CONFIG_TREE_RCU) #include <linux/rcutree.h> #elif defined(CONFIG_TINY_RCU) #include <linux/rcutiny.h> #else #error "Unknown RCU implementation specified to kernel configuration" #endif /* * The init_rcu_head_on_stack() and destroy_rcu_head_on_stack() calls * are needed for dynamic initialization and destruction of rcu_head * on the stack, and init_rcu_head()/destroy_rcu_head() are needed for * dynamic initialization and destruction of statically allocated rcu_head * structures. However, rcu_head structures allocated dynamically in the * heap don't need any initialization. */ #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD void init_rcu_head(struct rcu_head *head); void destroy_rcu_head(struct rcu_head *head); void init_rcu_head_on_stack(struct rcu_head *head); void destroy_rcu_head_on_stack(struct rcu_head *head); #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ static inline void init_rcu_head(struct rcu_head *head) { } static inline void destroy_rcu_head(struct rcu_head *head) { } static inline void init_rcu_head_on_stack(struct rcu_head *head) { } static inline void destroy_rcu_head_on_stack(struct rcu_head *head) { } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) bool rcu_lockdep_current_cpu_online(void); #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ static inline bool rcu_lockdep_current_cpu_online(void) { return true; } #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ #ifdef CONFIG_DEBUG_LOCK_ALLOC static inline void rcu_lock_acquire(struct lockdep_map *map) { lock_acquire(map, 0, 0, 2, 0, NULL, _THIS_IP_); } static inline void rcu_lock_release(struct lockdep_map *map) { lock_release(map, _THIS_IP_); } extern struct lockdep_map rcu_lock_map; extern struct lockdep_map rcu_bh_lock_map; extern struct lockdep_map rcu_sched_lock_map; extern struct lockdep_map rcu_callback_map; int debug_lockdep_rcu_enabled(void); int rcu_read_lock_held(void); int rcu_read_lock_bh_held(void); int rcu_read_lock_sched_held(void); int rcu_read_lock_any_held(void); #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ # define rcu_lock_acquire(a) do { } while (0) # define rcu_lock_release(a) do { } while (0) static inline int rcu_read_lock_held(void) { return 1; } static inline int rcu_read_lock_bh_held(void) { return 1; } static inline int rcu_read_lock_sched_held(void) { return !preemptible(); } static inline int rcu_read_lock_any_held(void) { return !preemptible(); } #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ #ifdef CONFIG_PROVE_RCU /** * RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met * @c: condition to check * @s: informative message */ #define RCU_LOCKDEP_WARN(c, s) \ do { \ static bool __section(".data.unlikely") __warned; \ if ((c) && debug_lockdep_rcu_enabled() && !__warned) { \ __warned = true; \ lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ } \ } while (0) #if defined(CONFIG_PROVE_RCU) && !defined(CONFIG_PREEMPT_RCU) static inline void rcu_preempt_sleep_check(void) { RCU_LOCKDEP_WARN(lock_is_held(&rcu_lock_map), "Illegal context switch in RCU read-side critical section"); } #else /* #ifdef CONFIG_PROVE_RCU */ static inline void rcu_preempt_sleep_check(void) { } #endif /* #else #ifdef CONFIG_PROVE_RCU */ #define rcu_sleep_check() \ do { \ rcu_preempt_sleep_check(); \ RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \ "Illegal context switch in RCU-bh read-side critical section"); \ RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map), \ "Illegal context switch in RCU-sched read-side critical section"); \ } while (0) #else /* #ifdef CONFIG_PROVE_RCU */ #define RCU_LOCKDEP_WARN(c, s) do { } while (0) #define rcu_sleep_check() do { } while (0) #endif /* #else #ifdef CONFIG_PROVE_RCU */ /* * Helper functions for rcu_dereference_check(), rcu_dereference_protected() * and rcu_assign_pointer(). Some of these could be folded into their * callers, but they are left separate in order to ease introduction of * multiple pointers markings to match different RCU implementations * (e.g., __srcu), should this make sense in the future. */ #ifdef __CHECKER__ #define rcu_check_sparse(p, space) \ ((void)(((typeof(*p) space *)p) == p)) #else /* #ifdef __CHECKER__ */ #define rcu_check_sparse(p, space) #endif /* #else #ifdef __CHECKER__ */ #define __rcu_access_pointer(p, space) \ ({ \ typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \ rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(_________p1)); \ }) #define __rcu_dereference_check(p, c, space) \ ({ \ /* Dependency order vs. p above. */ \ typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \ rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(________p1)); \ }) #define __rcu_dereference_protected(p, c, space) \ ({ \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_protected() usage"); \ rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) #define rcu_dereference_raw(p) \ ({ \ /* Dependency order vs. p above. */ \ typeof(p) ________p1 = READ_ONCE(p); \ ((typeof(*p) __force __kernel *)(________p1)); \ }) /** * RCU_INITIALIZER() - statically initialize an RCU-protected global variable * @v: The value to statically initialize with. */ #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) /** * rcu_assign_pointer() - assign to RCU-protected pointer * @p: pointer to assign to * @v: value to assign (publish) * * Assigns the specified value to the specified RCU-protected * pointer, ensuring that any concurrent RCU readers will see * any prior initialization. * * Inserts memory barriers on architectures that require them * (which is most of them), and also prevents the compiler from * reordering the code that initializes the structure after the pointer * assignment. More importantly, this call documents which pointers * will be dereferenced by RCU read-side code. * * In some special cases, you may use RCU_INIT_POINTER() instead * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due * to the fact that it does not constrain either the CPU or the compiler. * That said, using RCU_INIT_POINTER() when you should have used * rcu_assign_pointer() is a very bad thing that results in * impossible-to-diagnose memory corruption. So please be careful. * See the RCU_INIT_POINTER() comment header for details. * * Note that rcu_assign_pointer() evaluates each of its arguments only * once, appearances notwithstanding. One of the "extra" evaluations * is in typeof() and the other visible only to sparse (__CHECKER__), * neither of which actually execute the argument. As with most cpp * macros, this execute-arguments-only-once property is important, so * please be careful when making changes to rcu_assign_pointer() and the * other macros that it invokes. */ #define rcu_assign_pointer(p, v) \ do { \ uintptr_t _r_a_p__v = (uintptr_t)(v); \ rcu_check_sparse(p, __rcu); \ \ if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ else \ smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ } while (0) /** * rcu_replace_pointer() - replace an RCU pointer, returning its old value * @rcu_ptr: RCU pointer, whose old value is returned * @ptr: regular pointer * @c: the lockdep conditions under which the dereference will take place * * Perform a replacement, where @rcu_ptr is an RCU-annotated * pointer and @c is the lockdep argument that is passed to the * rcu_dereference_protected() call used to read that pointer. The old * value of @rcu_ptr is returned, and @rcu_ptr is set to @ptr. */ #define rcu_replace_pointer(rcu_ptr, ptr, c) \ ({ \ typeof(ptr) __tmp = rcu_dereference_protected((rcu_ptr), (c)); \ rcu_assign_pointer((rcu_ptr), (ptr)); \ __tmp; \ }) /** * rcu_access_pointer() - fetch RCU pointer with no dereferencing * @p: The pointer to read * * Return the value of the specified RCU-protected pointer, but omit the * lockdep checks for being in an RCU read-side critical section. This is * useful when the value of this pointer is accessed, but the pointer is * not dereferenced, for example, when testing an RCU-protected pointer * against NULL. Although rcu_access_pointer() may also be used in cases * where update-side locks prevent the value of the pointer from changing, * you should instead use rcu_dereference_protected() for this use case. * * It is also permissible to use rcu_access_pointer() when read-side * access to the pointer was removed at least one grace period ago, as * is the case in the context of the RCU callback that is freeing up * the data, or after a synchronize_rcu() returns. This can be useful * when tearing down multi-linked structures after a grace period * has elapsed. */ #define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) /** * rcu_dereference_check() - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * Do an rcu_dereference(), but check that the conditions under which the * dereference will take place are correct. Typically the conditions * indicate the various locking conditions that should be held at that * point. The check should return true if the conditions are satisfied. * An implicit check for being in an RCU read-side critical section * (rcu_read_lock()) is included. * * For example: * * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock)); * * could be used to indicate to lockdep that foo->bar may only be dereferenced * if either rcu_read_lock() is held, or that the lock required to replace * the bar struct at foo->bar is held. * * Note that the list of conditions may also include indications of when a lock * need not be held, for example during initialisation or destruction of the * target struct: * * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) || * atomic_read(&foo->usage) == 0); * * Inserts memory barriers on architectures that require them * (currently only the Alpha), prevents the compiler from refetching * (and from merging fetches), and, more importantly, documents exactly * which pointers are protected by RCU and checks that the pointer is * annotated as __rcu. */ #define rcu_dereference_check(p, c) \ __rcu_dereference_check((p), (c) || rcu_read_lock_held(), __rcu) /** * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * This is the RCU-bh counterpart to rcu_dereference_check(). */ #define rcu_dereference_bh_check(p, c) \ __rcu_dereference_check((p), (c) || rcu_read_lock_bh_held(), __rcu) /** * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * This is the RCU-sched counterpart to rcu_dereference_check(). */ #define rcu_dereference_sched_check(p, c) \ __rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \ __rcu) /* * The tracing infrastructure traces RCU (we want that), but unfortunately * some of the RCU checks causes tracing to lock up the system. * * The no-tracing version of rcu_dereference_raw() must not call * rcu_read_lock_held(). */ #define rcu_dereference_raw_check(p) __rcu_dereference_check((p), 1, __rcu) /** * rcu_dereference_protected() - fetch RCU pointer when updates prevented * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * Return the value of the specified RCU-protected pointer, but omit * the READ_ONCE(). This is useful in cases where update-side locks * prevent the value of the pointer from changing. Please note that this * primitive does *not* prevent the compiler from repeating this reference * or combining it with other references, so it should not be used without * protection of appropriate locks. * * This function is only for update-side use. Using this function * when protected only by rcu_read_lock() will result in infrequent * but very ugly failures. */ #define rcu_dereference_protected(p, c) \ __rcu_dereference_protected((p), (c), __rcu) /** * rcu_dereference() - fetch RCU-protected pointer for dereferencing * @p: The pointer to read, prior to dereferencing * * This is a simple wrapper around rcu_dereference_check(). */ #define rcu_dereference(p) rcu_dereference_check(p, 0) /** * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing * @p: The pointer to read, prior to dereferencing * * Makes rcu_dereference_check() do the dirty work. */ #define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0) /** * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing * @p: The pointer to read, prior to dereferencing * * Makes rcu_dereference_check() do the dirty work. */ #define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) /** * rcu_pointer_handoff() - Hand off a pointer from RCU to other mechanism * @p: The pointer to hand off * * This is simply an identity function, but it documents where a pointer * is handed off from RCU to some other synchronization mechanism, for * example, reference counting or locking. In C11, it would map to * kill_dependency(). It could be used as follows:: * * rcu_read_lock(); * p = rcu_dereference(gp); * long_lived = is_long_lived(p); * if (long_lived) { * if (!atomic_inc_not_zero(p->refcnt)) * long_lived = false; * else * p = rcu_pointer_handoff(p); * } * rcu_read_unlock(); */ #define rcu_pointer_handoff(p) (p) /** * rcu_read_lock() - mark the beginning of an RCU read-side critical section * * When synchronize_rcu() is invoked on one CPU while other CPUs * are within RCU read-side critical sections, then the * synchronize_rcu() is guaranteed to block until after all the other * CPUs exit their critical sections. Similarly, if call_rcu() is invoked * on one CPU while other CPUs are within RCU read-side critical * sections, invocation of the corresponding RCU callback is deferred * until after the all the other CPUs exit their critical sections. * * Note, however, that RCU callbacks are permitted to run concurrently * with new RCU read-side critical sections. One way that this can happen * is via the following sequence of events: (1) CPU 0 enters an RCU * read-side critical section, (2) CPU 1 invokes call_rcu() to register * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, * (4) CPU 2 enters a RCU read-side critical section, (5) the RCU * callback is invoked. This is legal, because the RCU read-side critical * section that was running concurrently with the call_rcu() (and which * therefore might be referencing something that the corresponding RCU * callback would free up) has completed before the corresponding * RCU callback is invoked. * * RCU read-side critical sections may be nested. Any deferred actions * will be deferred until the outermost RCU read-side critical section * completes. * * You can avoid reading and understanding the next paragraph by * following this rule: don't put anything in an rcu_read_lock() RCU * read-side critical section that would block in a !PREEMPTION kernel. * But if you want the full story, read on! * * In non-preemptible RCU implementations (pure TREE_RCU and TINY_RCU), * it is illegal to block while in an RCU read-side critical section. * In preemptible RCU implementations (PREEMPT_RCU) in CONFIG_PREEMPTION * kernel builds, RCU read-side critical sections may be preempted, * but explicit blocking is illegal. Finally, in preemptible RCU * implementations in real-time (with -rt patchset) kernel builds, RCU * read-side critical sections may be preempted and they may also block, but * only when acquiring spinlocks that are subject to priority inheritance. */ static __always_inline void rcu_read_lock(void) { __rcu_read_lock(); __acquire(RCU); rcu_lock_acquire(&rcu_lock_map); RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_lock() used illegally while idle"); } /* * So where is rcu_write_lock()? It does not exist, as there is no * way for writers to lock out RCU readers. This is a feature, not * a bug -- this property is what provides RCU's performance benefits. * Of course, writers must coordinate with each other. The normal * spinlock primitives work well for this, but any other technique may be * used as well. RCU does not care how the writers keep out of each * others' way, as long as they do so. */ /** * rcu_read_unlock() - marks the end of an RCU read-side critical section. * * In most situations, rcu_read_unlock() is immune from deadlock. * However, in kernels built with CONFIG_RCU_BOOST, rcu_read_unlock() * is responsible for deboosting, which it does via rt_mutex_unlock(). * Unfortunately, this function acquires the scheduler's runqueue and * priority-inheritance spinlocks. This means that deadlock could result * if the caller of rcu_read_unlock() already holds one of these locks or * any lock that is ever acquired while holding them. * * That said, RCU readers are never priority boosted unless they were * preempted. Therefore, one way to avoid deadlock is to make sure * that preemption never happens within any RCU read-side critical * section whose outermost rcu_read_unlock() is called with one of * rt_mutex_unlock()'s locks held. Such preemption can be avoided in * a number of ways, for example, by invoking preempt_disable() before * critical section's outermost rcu_read_lock(). * * Given that the set of locks acquired by rt_mutex_unlock() might change * at any time, a somewhat more future-proofed approach is to make sure * that that preemption never happens within any RCU read-side critical * section whose outermost rcu_read_unlock() is called with irqs disabled. * This approach relies on the fact that rt_mutex_unlock() currently only * acquires irq-disabled locks. * * The second of these two approaches is best in most situations, * however, the first approach can also be useful, at least to those * developers willing to keep abreast of the set of locks acquired by * rt_mutex_unlock(). * * See rcu_read_lock() for more information. */ static inline void rcu_read_unlock(void) { RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_unlock() used illegally while idle"); __release(RCU); __rcu_read_unlock(); rcu_lock_release(&rcu_lock_map); /* Keep acq info for rls diags. */ } /** * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section * * This is equivalent of rcu_read_lock(), but also disables softirqs. * Note that anything else that disables softirqs can also serve as * an RCU read-side critical section. * * Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh() * must occur in the same context, for example, it is illegal to invoke * rcu_read_unlock_bh() from one task if the matching rcu_read_lock_bh() * was invoked from some other task. */ static inline void rcu_read_lock_bh(void) { local_bh_disable(); __acquire(RCU_BH); rcu_lock_acquire(&rcu_bh_lock_map); RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_lock_bh() used illegally while idle"); } /** * rcu_read_unlock_bh() - marks the end of a softirq-only RCU critical section * * See rcu_read_lock_bh() for more information. */ static inline void rcu_read_unlock_bh(void) { RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_unlock_bh() used illegally while idle"); rcu_lock_release(&rcu_bh_lock_map); __release(RCU_BH); local_bh_enable(); } /** * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section * * This is equivalent of rcu_read_lock(), but disables preemption. * Read-side critical sections can also be introduced by anything else * that disables preemption, including local_irq_disable() and friends. * * Note that rcu_read_lock_sched() and the matching rcu_read_unlock_sched() * must occur in the same context, for example, it is illegal to invoke * rcu_read_unlock_sched() from process context if the matching * rcu_read_lock_sched() was invoked from an NMI handler. */ static inline void rcu_read_lock_sched(void) { preempt_disable(); __acquire(RCU_SCHED); rcu_lock_acquire(&rcu_sched_lock_map); RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_lock_sched() used illegally while idle"); } /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ static inline notrace void rcu_read_lock_sched_notrace(void) { preempt_disable_notrace(); __acquire(RCU_SCHED); } /** * rcu_read_unlock_sched() - marks the end of a RCU-classic critical section * * See rcu_read_lock_sched() for more information. */ static inline void rcu_read_unlock_sched(void) { RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_unlock_sched() used illegally while idle"); rcu_lock_release(&rcu_sched_lock_map); __release(RCU_SCHED); preempt_enable(); } /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ static inline notrace void rcu_read_unlock_sched_notrace(void) { __release(RCU_SCHED); preempt_enable_notrace(); } /** * RCU_INIT_POINTER() - initialize an RCU protected pointer * @p: The pointer to be initialized. * @v: The value to initialized the pointer to. * * Initialize an RCU-protected pointer in special cases where readers * do not need ordering constraints on the CPU or the compiler. These * special cases are: * * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer *or* * 2. The caller has taken whatever steps are required to prevent * RCU readers from concurrently accessing this pointer *or* * 3. The referenced data structure has already been exposed to * readers either at compile time or via rcu_assign_pointer() *and* * * a. You have not made *any* reader-visible changes to * this structure since then *or* * b. It is OK for readers accessing this structure from its * new location to see the old state of the structure. (For * example, the changes were to statistical counters or to * other state where exact synchronization is not required.) * * Failure to follow these rules governing use of RCU_INIT_POINTER() will * result in impossible-to-diagnose memory corruption. As in the structures * will look OK in crash dumps, but any concurrent RCU readers might * see pre-initialized values of the referenced data structure. So * please be very careful how you use RCU_INIT_POINTER()!!! * * If you are creating an RCU-protected linked structure that is accessed * by a single external-to-structure RCU-protected pointer, then you may * use RCU_INIT_POINTER() to initialize the internal RCU-protected * pointers, but you must use rcu_assign_pointer() to initialize the * external-to-structure pointer *after* you have completely initialized * the reader-accessible portions of the linked structure. * * Note that unlike rcu_assign_pointer(), RCU_INIT_POINTER() provides no * ordering guarantees for either the CPU or the compiler. */ #define RCU_INIT_POINTER(p, v) \ do { \ rcu_check_sparse(p, __rcu); \ WRITE_ONCE(p, RCU_INITIALIZER(v)); \ } while (0) /** * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer * @p: The pointer to be initialized. * @v: The value to initialized the pointer to. * * GCC-style initialization for an RCU-protected pointer in a structure field. */ #define RCU_POINTER_INITIALIZER(p, v) \ .p = RCU_INITIALIZER(v) /* * Does the specified offset indicate that the corresponding rcu_head * structure can be handled by kvfree_rcu()? */ #define __is_kvfree_rcu_offset(offset) ((offset) < 4096) /* * Helper macro for kfree_rcu() to prevent argument-expansion eyestrain. */ #define __kvfree_rcu(head, offset) \ do { \ BUILD_BUG_ON(!__is_kvfree_rcu_offset(offset)); \ kvfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \ } while (0) /** * kfree_rcu() - kfree an object after a grace period. * @ptr: pointer to kfree * @rhf: the name of the struct rcu_head within the type of @ptr. * * Many rcu callbacks functions just call kfree() on the base structure. * These functions are trivial, but their size adds up, and furthermore * when they are used in a kernel module, that module must invoke the * high-latency rcu_barrier() function at module-unload time. * * The kfree_rcu() function handles this issue. Rather than encoding a * function address in the embedded rcu_head structure, kfree_rcu() instead * encodes the offset of the rcu_head structure within the base structure. * Because the functions are not allowed in the low-order 4096 bytes of * kernel virtual memory, offsets up to 4095 bytes can be accommodated. * If the offset is larger than 4095 bytes, a compile-time error will * be generated in __kvfree_rcu(). If this error is triggered, you can * either fall back to use of call_rcu() or rearrange the structure to * position the rcu_head structure into the first 4096 bytes. * * Note that the allowable offset might decrease in the future, for example, * to allow something like kmem_cache_free_rcu(). * * The BUILD_BUG_ON check must not involve any function calls, hence the * checks are done in macros here. */ #define kfree_rcu(ptr, rhf) \ do { \ typeof (ptr) ___p = (ptr); \ \ if (___p) \ __kvfree_rcu(&((___p)->rhf), offsetof(typeof(*(ptr)), rhf)); \ } while (0) /** * kvfree_rcu() - kvfree an object after a grace period. * * This macro consists of one or two arguments and it is * based on whether an object is head-less or not. If it * has a head then a semantic stays the same as it used * to be before: * * kvfree_rcu(ptr, rhf); * * where @ptr is a pointer to kvfree(), @rhf is the name * of the rcu_head structure within the type of @ptr. * * When it comes to head-less variant, only one argument * is passed and that is just a pointer which has to be * freed after a grace period. Therefore the semantic is * * kvfree_rcu(ptr); * * where @ptr is a pointer to kvfree(). * * Please note, head-less way of freeing is permitted to * use from a context that has to follow might_sleep() * annotation. Otherwise, please switch and embed the * rcu_head structure within the type of @ptr. */ #define kvfree_rcu(...) KVFREE_GET_MACRO(__VA_ARGS__, \ kvfree_rcu_arg_2, kvfree_rcu_arg_1)(__VA_ARGS__) #define KVFREE_GET_MACRO(_1, _2, NAME, ...) NAME #define kvfree_rcu_arg_2(ptr, rhf) kfree_rcu(ptr, rhf) #define kvfree_rcu_arg_1(ptr) \ do { \ typeof(ptr) ___p = (ptr); \ \ if (___p) \ kvfree_call_rcu(NULL, (rcu_callback_t) (___p)); \ } while (0) /* * Place this after a lock-acquisition primitive to guarantee that * an UNLOCK+LOCK pair acts as a full barrier. This guarantee applies * if the UNLOCK and LOCK are executed by the same CPU or if the * UNLOCK and LOCK operate on the same lock variable. */ #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE #define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */ #else /* #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */ #define smp_mb__after_unlock_lock() do { } while (0) #endif /* #else #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */ /* Has the specified rcu_head structure been handed to call_rcu()? */ /** * rcu_head_init - Initialize rcu_head for rcu_head_after_call_rcu() * @rhp: The rcu_head structure to initialize. * * If you intend to invoke rcu_head_after_call_rcu() to test whether a * given rcu_head structure has already been passed to call_rcu(), then * you must also invoke this rcu_head_init() function on it just after * allocating that structure. Calls to this function must not race with * calls to call_rcu(), rcu_head_after_call_rcu(), or callback invocation. */ static inline void rcu_head_init(struct rcu_head *rhp) { rhp->func = (rcu_callback_t)~0L; } /** * rcu_head_after_call_rcu() - Has this rcu_head been passed to call_rcu()? * @rhp: The rcu_head structure to test. * @f: The function passed to call_rcu() along with @rhp. * * Returns @true if the @rhp has been passed to call_rcu() with @func, * and @false otherwise. Emits a warning in any other case, including * the case where @rhp has already been invoked after a grace period. * Calls to this function must not race with callback invocation. One way * to avoid such races is to enclose the call to rcu_head_after_call_rcu() * in an RCU read-side critical section that includes a read-side fetch * of the pointer to the structure containing @rhp. */ static inline bool rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f) { rcu_callback_t func = READ_ONCE(rhp->func); if (func == f) return true; WARN_ON_ONCE(func != (rcu_callback_t)~0L); return false; } /* kernel/ksysfs.c definitions */ extern int rcu_expedited; extern int rcu_normal; #endif /* __LINUX_RCUPDATE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * acpi.h - ACPI Interface * * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> */ #ifndef _LINUX_ACPI_H #define _LINUX_ACPI_H #include <linux/errno.h> #include <linux/ioport.h> /* for struct resource */ #include <linux/irqdomain.h> #include <linux/resource_ext.h> #include <linux/device.h> #include <linux/property.h> #include <linux/uuid.h> #ifndef _LINUX #define _LINUX #endif #include <acpi/acpi.h> #ifdef CONFIG_ACPI #include <linux/list.h> #include <linux/mod_devicetable.h> #include <linux/dynamic_debug.h> #include <linux/module.h> #include <linux/mutex.h> #include <acpi/acpi_bus.h> #include <acpi/acpi_drivers.h> #include <acpi/acpi_numa.h> #include <acpi/acpi_io.h> #include <asm/acpi.h> static inline acpi_handle acpi_device_handle(struct acpi_device *adev) { return adev ? adev->handle : NULL; } #define ACPI_COMPANION(dev) to_acpi_device_node((dev)->fwnode) #define ACPI_COMPANION_SET(dev, adev) set_primary_fwnode(dev, (adev) ? \ acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) #define ACPI_HANDLE_FWNODE(fwnode) \ acpi_device_handle(to_acpi_device_node(fwnode)) static inline struct fwnode_handle *acpi_alloc_fwnode_static(void) { struct fwnode_handle *fwnode; fwnode = kzalloc(sizeof(struct fwnode_handle), GFP_KERNEL); if (!fwnode) return NULL; fwnode->ops = &acpi_static_fwnode_ops; return fwnode; } static inline void acpi_free_fwnode_static(struct fwnode_handle *fwnode) { if (WARN_ON(!is_acpi_static_node(fwnode))) return; kfree(fwnode); } /** * ACPI_DEVICE_CLASS - macro used to describe an ACPI device with * the PCI-defined class-code information * * @_cls : the class, subclass, prog-if triple for this device * @_msk : the class mask for this device * * This macro is used to create a struct acpi_device_id that matches a * specific PCI class. The .id and .driver_data fields will be left * initialized with the default value. */ #define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (_cls), .cls_msk = (_msk), static inline bool has_acpi_companion(struct device *dev) { return is_acpi_device_node(dev->fwnode); } static inline void acpi_preset_companion(struct device *dev, struct acpi_device *parent, u64 addr) { ACPI_COMPANION_SET(dev, acpi_find_child_device(parent, addr, false)); } static inline const char *acpi_dev_name(struct acpi_device *adev) { return dev_name(&adev->dev); } struct device *acpi_get_first_physical_node(struct acpi_device *adev); enum acpi_irq_model_id { ACPI_IRQ_MODEL_PIC = 0, ACPI_IRQ_MODEL_IOAPIC, ACPI_IRQ_MODEL_IOSAPIC, ACPI_IRQ_MODEL_PLATFORM, ACPI_IRQ_MODEL_GIC, ACPI_IRQ_MODEL_COUNT }; extern enum acpi_irq_model_id acpi_irq_model; enum acpi_interrupt_id { ACPI_INTERRUPT_PMI = 1, ACPI_INTERRUPT_INIT, ACPI_INTERRUPT_CPEI, ACPI_INTERRUPT_COUNT }; #define ACPI_SPACE_MEM 0 enum acpi_address_range_id { ACPI_ADDRESS_RANGE_MEMORY = 1, ACPI_ADDRESS_RANGE_RESERVED = 2, ACPI_ADDRESS_RANGE_ACPI = 3, ACPI_ADDRESS_RANGE_NVS = 4, ACPI_ADDRESS_RANGE_COUNT }; /* Table Handlers */ union acpi_subtable_headers { struct acpi_subtable_header common; struct acpi_hmat_structure hmat; }; typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table); typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header, const unsigned long end); /* Debugger support */ struct acpi_debugger_ops { int (*create_thread)(acpi_osd_exec_callback function, void *context); ssize_t (*write_log)(const char *msg); ssize_t (*read_cmd)(char *buffer, size_t length); int (*wait_command_ready)(bool single_step, char *buffer, size_t length); int (*notify_command_complete)(void); }; struct acpi_debugger { const struct acpi_debugger_ops *ops; struct module *owner; struct mutex lock; }; #ifdef CONFIG_ACPI_DEBUGGER int __init acpi_debugger_init(void); int acpi_register_debugger(struct module *owner, const struct acpi_debugger_ops *ops); void acpi_unregister_debugger(const struct acpi_debugger_ops *ops); int acpi_debugger_create_thread(acpi_osd_exec_callback function, void *context); ssize_t acpi_debugger_write_log(const char *msg); ssize_t acpi_debugger_read_cmd(char *buffer, size_t buffer_length); int acpi_debugger_wait_command_ready(void); int acpi_debugger_notify_command_complete(void); #else static inline int acpi_debugger_init(void) { return -ENODEV; } static inline int acpi_register_debugger(struct module *owner, const struct acpi_debugger_ops *ops) { return -ENODEV; } static inline void acpi_unregister_debugger(const struct acpi_debugger_ops *ops) { } static inline int acpi_debugger_create_thread(acpi_osd_exec_callback function, void *context) { return -ENODEV; } static inline int acpi_debugger_write_log(const char *msg) { return -ENODEV; } static inline int acpi_debugger_read_cmd(char *buffer, u32 buffer_length) { return -ENODEV; } static inline int acpi_debugger_wait_command_ready(void) { return -ENODEV; } static inline int acpi_debugger_notify_command_complete(void) { return -ENODEV; } #endif #define BAD_MADT_ENTRY(entry, end) ( \ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ ((struct acpi_subtable_header *)entry)->length < sizeof(*entry)) struct acpi_subtable_proc { int id; acpi_tbl_entry_handler handler; int count; }; void __iomem *__acpi_map_table(unsigned long phys, unsigned long size); void __acpi_unmap_table(void __iomem *map, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); void acpi_boot_table_prepare (void); void acpi_boot_table_init (void); int acpi_mps_check (void); int acpi_numa_init (void); int acpi_locate_initial_tables (void); void acpi_reserve_initial_tables (void); void acpi_table_init_complete (void); int acpi_table_init (void); int acpi_table_parse(char *id, acpi_tbl_table_handler handler); int __init acpi_table_parse_entries(char *id, unsigned long table_size, int entry_id, acpi_tbl_entry_handler handler, unsigned int max_entries); int __init acpi_table_parse_entries_array(char *id, unsigned long table_size, struct acpi_subtable_proc *proc, int proc_num, unsigned int max_entries); int acpi_table_parse_madt(enum acpi_madt_type id, acpi_tbl_entry_handler handler, unsigned int max_entries); int acpi_parse_mcfg (struct acpi_table_header *header); void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); /* the following numa functions are architecture-dependent */ void acpi_numa_slit_init (struct acpi_table_slit *slit); #if defined(CONFIG_X86) || defined(CONFIG_IA64) void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa); #else static inline void acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) { } #endif void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa); #ifdef CONFIG_ARM64 void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa); #else static inline void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { } #endif int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); #ifndef PHYS_CPUID_INVALID typedef u32 phys_cpuid_t; #define PHYS_CPUID_INVALID (phys_cpuid_t)(-1) #endif static inline bool invalid_logical_cpuid(u32 cpuid) { return (int)cpuid < 0; } static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id) { return phys_id == PHYS_CPUID_INVALID; } /* Validate the processor object's proc_id */ bool acpi_duplicate_processor_id(int proc_id); /* Processor _CTS control */ struct acpi_processor_power; #ifdef CONFIG_ACPI_PROCESSOR_CSTATE bool acpi_processor_claim_cst_control(void); int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu, struct acpi_processor_power *info); #else static inline bool acpi_processor_claim_cst_control(void) { return false; } static inline int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu, struct acpi_processor_power *info) { return -ENODEV; } #endif #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu); int acpi_unmap_cpu(int cpu); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr); #endif int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base); int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base); int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base); void acpi_irq_stats_init(void); extern u32 acpi_irq_handled; extern u32 acpi_irq_not_handled; extern unsigned int acpi_sci_irq; extern bool acpi_no_s5; #define INVALID_ACPI_IRQ ((unsigned)-1) static inline bool acpi_sci_irq_valid(void) { return acpi_sci_irq != INVALID_ACPI_IRQ; } extern int sbf_port; extern unsigned long acpi_realmode_flags; int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity); int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi); void acpi_set_irq_model(enum acpi_irq_model_id model, struct fwnode_handle *fwnode); struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags, unsigned int size, struct fwnode_handle *fwnode, const struct irq_domain_ops *ops, void *host_data); #ifdef CONFIG_X86_IO_APIC extern int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity); #else static inline int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) { return -1; } #endif /* * This function undoes the effect of one call to acpi_register_gsi(). * If this matches the last registration, any IRQ resources for gsi * are freed. */ void acpi_unregister_gsi (u32 gsi); struct pci_dev; int acpi_pci_irq_enable (struct pci_dev *dev); void acpi_penalize_isa_irq(int irq, int active); bool acpi_isa_irq_available(int irq); #ifdef CONFIG_PCI void acpi_penalize_sci_irq(int irq, int trigger, int polarity); #else static inline void acpi_penalize_sci_irq(int irq, int trigger, int polarity) { } #endif void acpi_pci_irq_disable (struct pci_dev *dev); extern int ec_read(u8 addr, u8 *val); extern int ec_write(u8 addr, u8 val); extern int ec_transaction(u8 command, const u8 *wdata, unsigned wdata_len, u8 *rdata, unsigned rdata_len); extern acpi_handle ec_get_handle(void); extern bool acpi_is_pnp_device(struct acpi_device *); #if defined(CONFIG_ACPI_WMI) || defined(CONFIG_ACPI_WMI_MODULE) typedef void (*wmi_notify_handler) (u32 value, void *context); extern acpi_status wmi_evaluate_method(const char *guid, u8 instance, u32 method_id, const struct acpi_buffer *in, struct acpi_buffer *out); extern acpi_status wmi_query_block(const char *guid, u8 instance, struct acpi_buffer *out); extern acpi_status wmi_set_block(const char *guid, u8 instance, const struct acpi_buffer *in); extern acpi_status wmi_install_notify_handler(const char *guid, wmi_notify_handler handler, void *data); extern acpi_status wmi_remove_notify_handler(const char *guid); extern acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out); extern bool wmi_has_guid(const char *guid); extern char *wmi_get_acpi_device_uid(const char *guid); #endif /* CONFIG_ACPI_WMI */ #define ACPI_VIDEO_OUTPUT_SWITCHING 0x0001 #define ACPI_VIDEO_DEVICE_POSTING 0x0002 #define ACPI_VIDEO_ROM_AVAILABLE 0x0004 #define ACPI_VIDEO_BACKLIGHT 0x0008 #define ACPI_VIDEO_BACKLIGHT_FORCE_VENDOR 0x0010 #define ACPI_VIDEO_BACKLIGHT_FORCE_VIDEO 0x0020 #define ACPI_VIDEO_OUTPUT_SWITCHING_FORCE_VENDOR 0x0040 #define ACPI_VIDEO_OUTPUT_SWITCHING_FORCE_VIDEO 0x0080 #define ACPI_VIDEO_BACKLIGHT_DMI_VENDOR 0x0100 #define ACPI_VIDEO_BACKLIGHT_DMI_VIDEO 0x0200 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR 0x0400 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VIDEO 0x0800 extern char acpi_video_backlight_string[]; extern long acpi_is_video_device(acpi_handle handle); extern int acpi_blacklisted(void); extern void acpi_osi_setup(char *str); extern bool acpi_osi_is_win8(void); #ifdef CONFIG_ACPI_NUMA int acpi_map_pxm_to_node(int pxm); int acpi_get_node(acpi_handle handle); /** * pxm_to_online_node - Map proximity ID to online node * @pxm: ACPI proximity ID * * This is similar to pxm_to_node(), but always returns an online * node. When the mapped node from a given proximity ID is offline, it * looks up the node distance table and returns the nearest online node. * * ACPI device drivers, which are called after the NUMA initialization has * completed in the kernel, can call this interface to obtain their device * NUMA topology from ACPI tables. Such drivers do not have to deal with * offline nodes. A node may be offline when SRAT memory entry does not exist, * or NUMA is disabled, ex. "numa=off" on x86. */ static inline int pxm_to_online_node(int pxm) { int node = pxm_to_node(pxm); return numa_map_to_online_node(node); } #else static inline int pxm_to_online_node(int pxm) { return 0; } static inline int acpi_map_pxm_to_node(int pxm) { return 0; } static inline int acpi_get_node(acpi_handle handle) { return 0; } #endif extern int acpi_paddr_to_node(u64 start_addr, u64 size); extern int pnpacpi_disabled; #define PXM_INVAL (-1) bool acpi_dev_resource_memory(struct acpi_resource *ares, struct resource *res); bool acpi_dev_resource_io(struct acpi_resource *ares, struct resource *res); bool acpi_dev_resource_address_space(struct acpi_resource *ares, struct resource_win *win); bool acpi_dev_resource_ext_address_space(struct acpi_resource *ares, struct resource_win *win); unsigned long acpi_dev_irq_flags(u8 triggering, u8 polarity, u8 shareable); unsigned int acpi_dev_get_irq_type(int triggering, int polarity); bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index, struct resource *res); void acpi_dev_free_resource_list(struct list_head *list); int acpi_dev_get_resources(struct acpi_device *adev, struct list_head *list, int (*preproc)(struct acpi_resource *, void *), void *preproc_data); int acpi_dev_get_dma_resources(struct acpi_device *adev, struct list_head *list); int acpi_dev_filter_resource_type(struct acpi_resource *ares, unsigned long types); static inline int acpi_dev_filter_resource_type_cb(struct acpi_resource *ares, void *arg) { return acpi_dev_filter_resource_type(ares, (unsigned long)arg); } struct acpi_device *acpi_resource_consumer(struct resource *res); int acpi_check_resource_conflict(const struct resource *res); int acpi_check_region(resource_size_t start, resource_size_t n, const char *name); acpi_status acpi_release_memory(acpi_handle handle, struct resource *res, u32 level); int acpi_resources_are_enforced(void); #ifdef CONFIG_HIBERNATION void __init acpi_no_s4_hw_signature(void); #endif #ifdef CONFIG_PM_SLEEP void __init acpi_old_suspend_ordering(void); void __init acpi_nvs_nosave(void); void __init acpi_nvs_nosave_s3(void); void __init acpi_sleep_no_blacklist(void); #endif /* CONFIG_PM_SLEEP */ int acpi_register_wakeup_handler( int wake_irq, bool (*wakeup)(void *context), void *context); void acpi_unregister_wakeup_handler( bool (*wakeup)(void *context), void *context); struct acpi_osc_context { char *uuid_str; /* UUID string */ int rev; struct acpi_buffer cap; /* list of DWORD capabilities */ struct acpi_buffer ret; /* free by caller if success */ }; acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); /* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */ #define OSC_QUERY_DWORD 0 /* DWORD 1 */ #define OSC_SUPPORT_DWORD 1 /* DWORD 2 */ #define OSC_CONTROL_DWORD 2 /* DWORD 3 */ /* _OSC Capabilities DWORD 1: Query/Control and Error Returns (generic) */ #define OSC_QUERY_ENABLE 0x00000001 /* input */ #define OSC_REQUEST_ERROR 0x00000002 /* return */ #define OSC_INVALID_UUID_ERROR 0x00000004 /* return */ #define OSC_INVALID_REVISION_ERROR 0x00000008 /* return */ #define OSC_CAPABILITIES_MASK_ERROR 0x00000010 /* return */ /* Platform-Wide Capabilities _OSC: Capabilities DWORD 2: Support Field */ #define OSC_SB_PAD_SUPPORT 0x00000001 #define OSC_SB_PPC_OST_SUPPORT 0x00000002 #define OSC_SB_PR3_SUPPORT 0x00000004 #define OSC_SB_HOTPLUG_OST_SUPPORT 0x00000008 #define OSC_SB_APEI_SUPPORT 0x00000010 #define OSC_SB_CPC_SUPPORT 0x00000020 #define OSC_SB_CPCV2_SUPPORT 0x00000040 #define OSC_SB_PCLPI_SUPPORT 0x00000080 #define OSC_SB_OSLPI_SUPPORT 0x00000100 #define OSC_SB_CPC_DIVERSE_HIGH_SUPPORT 0x00001000 #define OSC_SB_GENERIC_INITIATOR_SUPPORT 0x00002000 extern bool osc_sb_apei_support_acked; extern bool osc_pc_lpi_support_confirmed; /* PCI Host Bridge _OSC: Capabilities DWORD 2: Support Field */ #define OSC_PCI_EXT_CONFIG_SUPPORT 0x00000001 #define OSC_PCI_ASPM_SUPPORT 0x00000002 #define OSC_PCI_CLOCK_PM_SUPPORT 0x00000004 #define OSC_PCI_SEGMENT_GROUPS_SUPPORT 0x00000008 #define OSC_PCI_MSI_SUPPORT 0x00000010 #define OSC_PCI_EDR_SUPPORT 0x00000080 #define OSC_PCI_HPX_TYPE_3_SUPPORT 0x00000100 #define OSC_PCI_SUPPORT_MASKS 0x0000019f /* PCI Host Bridge _OSC: Capabilities DWORD 3: Control Field */ #define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL 0x00000001 #define OSC_PCI_SHPC_NATIVE_HP_CONTROL 0x00000002 #define OSC_PCI_EXPRESS_PME_CONTROL 0x00000004 #define OSC_PCI_EXPRESS_AER_CONTROL 0x00000008 #define OSC_PCI_EXPRESS_CAPABILITY_CONTROL 0x00000010 #define OSC_PCI_EXPRESS_LTR_CONTROL 0x00000020 #define OSC_PCI_EXPRESS_DPC_CONTROL 0x00000080 #define OSC_PCI_CONTROL_MASKS 0x000000bf #define ACPI_GSB_ACCESS_ATTRIB_QUICK 0x00000002 #define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV 0x00000004 #define ACPI_GSB_ACCESS_ATTRIB_BYTE 0x00000006 #define ACPI_GSB_ACCESS_ATTRIB_WORD 0x00000008 #define ACPI_GSB_ACCESS_ATTRIB_BLOCK 0x0000000A #define ACPI_GSB_ACCESS_ATTRIB_MULTIBYTE 0x0000000B #define ACPI_GSB_ACCESS_ATTRIB_WORD_CALL 0x0000000C #define ACPI_GSB_ACCESS_ATTRIB_BLOCK_CALL 0x0000000D #define ACPI_GSB_ACCESS_ATTRIB_RAW_BYTES 0x0000000E #define ACPI_GSB_ACCESS_ATTRIB_RAW_PROCESS 0x0000000F extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 req); /* Enable _OST when all relevant hotplug operations are enabled */ #if defined(CONFIG_ACPI_HOTPLUG_CPU) && \ defined(CONFIG_ACPI_HOTPLUG_MEMORY) && \ defined(CONFIG_ACPI_CONTAINER) #define ACPI_HOTPLUG_OST #endif /* _OST Source Event Code (OSPM Action) */ #define ACPI_OST_EC_OSPM_SHUTDOWN 0x100 #define ACPI_OST_EC_OSPM_EJECT 0x103 #define ACPI_OST_EC_OSPM_INSERTION 0x200 /* _OST General Processing Status Code */ #define ACPI_OST_SC_SUCCESS 0x0 #define ACPI_OST_SC_NON_SPECIFIC_FAILURE 0x1 #define ACPI_OST_SC_UNRECOGNIZED_NOTIFY 0x2 /* _OST OS Shutdown Processing (0x100) Status Code */ #define ACPI_OST_SC_OS_SHUTDOWN_DENIED 0x80 #define ACPI_OST_SC_OS_SHUTDOWN_IN_PROGRESS 0x81 #define ACPI_OST_SC_OS_SHUTDOWN_COMPLETED 0x82 #define ACPI_OST_SC_OS_SHUTDOWN_NOT_SUPPORTED 0x83 /* _OST Ejection Request (0x3, 0x103) Status Code */ #define ACPI_OST_SC_EJECT_NOT_SUPPORTED 0x80 #define ACPI_OST_SC_DEVICE_IN_USE 0x81 #define ACPI_OST_SC_DEVICE_BUSY 0x82 #define ACPI_OST_SC_EJECT_DEPENDENCY_BUSY 0x83 #define ACPI_OST_SC_EJECT_IN_PROGRESS 0x84 /* _OST Insertion Request (0x200) Status Code */ #define ACPI_OST_SC_INSERT_IN_PROGRESS 0x80 #define ACPI_OST_SC_DRIVER_LOAD_FAILURE 0x81 #define ACPI_OST_SC_INSERT_NOT_SUPPORTED 0x82 enum acpi_predicate { all_versions, less_than_or_equal, equal, greater_than_or_equal, }; /* Table must be terminted by a NULL entry */ struct acpi_platform_list { char oem_id[ACPI_OEM_ID_SIZE+1]; char oem_table_id[ACPI_OEM_TABLE_ID_SIZE+1]; u32 oem_revision; char *table; enum acpi_predicate pred; char *reason; u32 data; }; int acpi_match_platform_list(const struct acpi_platform_list *plat); extern void acpi_early_init(void); extern void acpi_subsystem_init(void); extern void arch_post_acpi_subsys_init(void); extern int acpi_nvs_register(__u64 start, __u64 size); extern int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *), void *data); const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids, const struct device *dev); const void *acpi_device_get_match_data(const struct device *dev); extern bool acpi_driver_match_device(struct device *dev, const struct device_driver *drv); int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *); int acpi_device_modalias(struct device *, char *, int); void acpi_walk_dep_device_list(acpi_handle handle); struct platform_device *acpi_create_platform_device(struct acpi_device *, struct property_entry *); #define ACPI_PTR(_ptr) (_ptr) static inline void acpi_device_set_enumerated(struct acpi_device *adev) { adev->flags.visited = true; } static inline void acpi_device_clear_enumerated(struct acpi_device *adev) { adev->flags.visited = false; } enum acpi_reconfig_event { ACPI_RECONFIG_DEVICE_ADD = 0, ACPI_RECONFIG_DEVICE_REMOVE, }; int acpi_reconfig_notifier_register(struct notifier_block *nb); int acpi_reconfig_notifier_unregister(struct notifier_block *nb); #ifdef CONFIG_ACPI_GTDT int acpi_gtdt_init(struct acpi_table_header *table, int *platform_timer_count); int acpi_gtdt_map_ppi(int type); bool acpi_gtdt_c3stop(int type); int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count); #endif #ifndef ACPI_HAVE_ARCH_SET_ROOT_POINTER static inline void acpi_arch_set_root_pointer(u64 addr) { } #endif #ifndef ACPI_HAVE_ARCH_GET_ROOT_POINTER static inline u64 acpi_arch_get_root_pointer(void) { return 0; } #endif #else /* !CONFIG_ACPI */ #define acpi_disabled 1 #define ACPI_COMPANION(dev) (NULL) #define ACPI_COMPANION_SET(dev, adev) do { } while (0) #define ACPI_HANDLE(dev) (NULL) #define ACPI_HANDLE_FWNODE(fwnode) (NULL) #define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (0), .cls_msk = (0), #include <acpi/acpi_numa.h> struct fwnode_handle; static inline bool acpi_dev_found(const char *hid) { return false; } static inline bool acpi_dev_present(const char *hid, const char *uid, s64 hrv) { return false; } struct acpi_device; static inline bool acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *uid2) { return false; } static inline struct acpi_device * acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv) { return NULL; } static inline void acpi_dev_put(struct acpi_device *adev) {} static inline bool is_acpi_node(struct fwnode_handle *fwnode) { return false; } static inline bool is_acpi_device_node(struct fwnode_handle *fwnode) { return false; } static inline struct acpi_device *to_acpi_device_node(struct fwnode_handle *fwnode) { return NULL; } static inline bool is_acpi_data_node(struct fwnode_handle *fwnode) { return false; } static inline struct acpi_data_node *to_acpi_data_node(struct fwnode_handle *fwnode) { return NULL; } static inline bool acpi_data_node_match(struct fwnode_handle *fwnode, const char *name) { return false; } static inline struct fwnode_handle *acpi_fwnode_handle(struct acpi_device *adev) { return NULL; } static inline bool has_acpi_companion(struct device *dev) { return false; } static inline void acpi_preset_companion(struct device *dev, struct acpi_device *parent, u64 addr) { } static inline const char *acpi_dev_name(struct acpi_device *adev) { return NULL; } static inline struct device *acpi_get_first_physical_node(struct acpi_device *adev) { return NULL; } static inline void acpi_early_init(void) { } static inline void acpi_subsystem_init(void) { } static inline int early_acpi_boot_init(void) { return 0; } static inline int acpi_boot_init(void) { return 0; } static inline void acpi_boot_table_prepare(void) { } static inline void acpi_boot_table_init(void) { } static inline int acpi_mps_check(void) { return 0; } static inline int acpi_check_resource_conflict(struct resource *res) { return 0; } static inline int acpi_check_region(resource_size_t start, resource_size_t n, const char *name) { return 0; } struct acpi_table_header; static inline int acpi_table_parse(char *id, int (*handler)(struct acpi_table_header *)) { return -ENODEV; } static inline int acpi_nvs_register(__u64 start, __u64 size) { return 0; } static inline int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *), void *data) { return 0; } struct acpi_device_id; static inline const struct acpi_device_id *acpi_match_device( const struct acpi_device_id *ids, const struct device *dev) { return NULL; } static inline const void *acpi_device_get_match_data(const struct device *dev) { return NULL; } static inline bool acpi_driver_match_device(struct device *dev, const struct device_driver *drv) { return false; } static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4) { return NULL; } static inline int acpi_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env) { return -ENODEV; } static inline int acpi_device_modalias(struct device *dev, char *buf, int size) { return -ENODEV; } static inline struct platform_device * acpi_create_platform_device(struct acpi_device *adev, struct property_entry *properties) { return NULL; } static inline bool acpi_dma_supported(struct acpi_device *adev) { return false; } static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) { return DEV_DMA_NOT_SUPPORTED; } static inline int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, u64 *size) { return -ENODEV; } static inline int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr) { return 0; } static inline int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, const u32 *input_id) { return 0; } #define ACPI_PTR(_ptr) (NULL) static inline void acpi_device_set_enumerated(struct acpi_device *adev) { } static inline void acpi_device_clear_enumerated(struct acpi_device *adev) { } static inline int acpi_reconfig_notifier_register(struct notifier_block *nb) { return -EINVAL; } static inline int acpi_reconfig_notifier_unregister(struct notifier_block *nb) { return -EINVAL; } static inline struct acpi_device *acpi_resource_consumer(struct resource *res) { return NULL; } static inline int acpi_register_wakeup_handler(int wake_irq, bool (*wakeup)(void *context), void *context) { return -ENXIO; } static inline void acpi_unregister_wakeup_handler( bool (*wakeup)(void *context), void *context) { } #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_ioapic_add(acpi_handle root); #else static inline int acpi_ioapic_add(acpi_handle root) { return 0; } #endif #ifdef CONFIG_ACPI void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state, u32 pm1a_ctrl, u32 pm1b_ctrl)); acpi_status acpi_os_prepare_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control); void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state, u32 val_a, u32 val_b)); acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b); #ifndef CONFIG_IA64 void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else static inline void arch_reserve_mem_area(acpi_physical_address addr, size_t size) { } #endif /* CONFIG_X86 */ #else #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0) #endif #if defined(CONFIG_ACPI) && defined(CONFIG_PM) int acpi_dev_suspend(struct device *dev, bool wakeup); int acpi_dev_resume(struct device *dev); int acpi_subsys_runtime_suspend(struct device *dev); int acpi_subsys_runtime_resume(struct device *dev); int acpi_dev_pm_attach(struct device *dev, bool power_on); #else static inline int acpi_subsys_runtime_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; } static inline int acpi_dev_pm_attach(struct device *dev, bool power_on) { return 0; } #endif #if defined(CONFIG_ACPI) && defined(CONFIG_PM_SLEEP) int acpi_subsys_prepare(struct device *dev); void acpi_subsys_complete(struct device *dev); int acpi_subsys_suspend_late(struct device *dev); int acpi_subsys_suspend_noirq(struct device *dev); int acpi_subsys_suspend(struct device *dev); int acpi_subsys_freeze(struct device *dev); int acpi_subsys_poweroff(struct device *dev); void acpi_ec_mark_gpe_for_wake(void); void acpi_ec_set_gpe_wake_mask(u8 action); #else static inline int acpi_subsys_prepare(struct device *dev) { return 0; } static inline void acpi_subsys_complete(struct device *dev) {} static inline int acpi_subsys_suspend_late(struct device *dev) { return 0; } static inline int acpi_subsys_suspend_noirq(struct device *dev) { return 0; } static inline int acpi_subsys_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_freeze(struct device *dev) { return 0; } static inline int acpi_subsys_poweroff(struct device *dev) { return 0; } static inline void acpi_ec_mark_gpe_for_wake(void) {} static inline void acpi_ec_set_gpe_wake_mask(u8 action) {} #endif #ifdef CONFIG_ACPI __printf(3, 4) void acpi_handle_printk(const char *level, acpi_handle handle, const char *fmt, ...); #else /* !CONFIG_ACPI */ static inline __printf(3, 4) void acpi_handle_printk(const char *level, void *handle, const char *fmt, ...) {} #endif /* !CONFIG_ACPI */ #if defined(CONFIG_ACPI) && defined(CONFIG_DYNAMIC_DEBUG) __printf(3, 4) void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const char *fmt, ...); #endif /* * acpi_handle_<level>: Print message with ACPI prefix and object path * * These interfaces acquire the global namespace mutex to obtain an object * path. In interrupt context, it shows the object path as <n/a>. */ #define acpi_handle_emerg(handle, fmt, ...) \ acpi_handle_printk(KERN_EMERG, handle, fmt, ##__VA_ARGS__) #define acpi_handle_alert(handle, fmt, ...) \ acpi_handle_printk(KERN_ALERT, handle, fmt, ##__VA_ARGS__) #define acpi_handle_crit(handle, fmt, ...) \ acpi_handle_printk(KERN_CRIT, handle, fmt, ##__VA_ARGS__) #define acpi_handle_err(handle, fmt, ...) \ acpi_handle_printk(KERN_ERR, handle, fmt, ##__VA_ARGS__) #define acpi_handle_warn(handle, fmt, ...) \ acpi_handle_printk(KERN_WARNING, handle, fmt, ##__VA_ARGS__) #define acpi_handle_notice(handle, fmt, ...) \ acpi_handle_printk(KERN_NOTICE, handle, fmt, ##__VA_ARGS__) #define acpi_handle_info(handle, fmt, ...) \ acpi_handle_printk(KERN_INFO, handle, fmt, ##__VA_ARGS__) #if defined(DEBUG) #define acpi_handle_debug(handle, fmt, ...) \ acpi_handle_printk(KERN_DEBUG, handle, fmt, ##__VA_ARGS__) #else #if defined(CONFIG_DYNAMIC_DEBUG) #define acpi_handle_debug(handle, fmt, ...) \ _dynamic_func_call(fmt, __acpi_handle_debug, \ handle, pr_fmt(fmt), ##__VA_ARGS__) #else #define acpi_handle_debug(handle, fmt, ...) \ ({ \ if (0) \ acpi_handle_printk(KERN_DEBUG, handle, fmt, ##__VA_ARGS__); \ 0; \ }) #endif #endif #if defined(CONFIG_ACPI) && defined(CONFIG_GPIOLIB) bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio); int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index); #else static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio) { return false; } static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index) { return -ENXIO; } #endif static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) { return acpi_dev_gpio_irq_get_by(adev, NULL, index); } /* Device properties */ #ifdef CONFIG_ACPI int acpi_dev_get_property(const struct acpi_device *adev, const char *name, acpi_object_type type, const union acpi_object **obj); int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, const char *name, size_t index, size_t num_args, struct fwnode_reference_args *args); static inline int acpi_node_get_property_reference( const struct fwnode_handle *fwnode, const char *name, size_t index, struct fwnode_reference_args *args) { return __acpi_node_get_property_reference(fwnode, name, index, NR_FWNODE_REFERENCE_ARGS, args); } static inline bool acpi_dev_has_props(const struct acpi_device *adev) { return !list_empty(&adev->data.properties); } struct acpi_device_properties * acpi_data_add_props(struct acpi_device_data *data, const guid_t *guid, const union acpi_object *properties); int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname, void **valptr); int acpi_dev_prop_read_single(struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, void *val); int acpi_node_prop_read(const struct fwnode_handle *fwnode, const char *propname, enum dev_prop_type proptype, void *val, size_t nval); int acpi_dev_prop_read(const struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, void *val, size_t nval); struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode, struct fwnode_handle *child); struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode); struct acpi_probe_entry; typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *, struct acpi_probe_entry *); #define ACPI_TABLE_ID_LEN 5 /** * struct acpi_probe_entry - boot-time probing entry * @id: ACPI table name * @type: Optional subtable type to match * (if @id contains subtables) * @subtable_valid: Optional callback to check the validity of * the subtable * @probe_table: Callback to the driver being probed when table * match is successful * @probe_subtbl: Callback to the driver being probed when table and * subtable match (and optional callback is successful) * @driver_data: Sideband data provided back to the driver */ struct acpi_probe_entry { __u8 id[ACPI_TABLE_ID_LEN]; __u8 type; acpi_probe_entry_validate_subtbl subtable_valid; union { acpi_tbl_table_handler probe_table; acpi_tbl_entry_handler probe_subtbl; }; kernel_ulong_t driver_data; }; #define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, \ valid, data, fn) \ static const struct acpi_probe_entry __acpi_probe_##name \ __used __section("__" #table "_acpi_probe_table") = { \ .id = table_id, \ .type = subtable, \ .subtable_valid = valid, \ .probe_table = fn, \ .driver_data = data, \ } #define ACPI_DECLARE_SUBTABLE_PROBE_ENTRY(table, name, table_id, \ subtable, valid, data, fn) \ static const struct acpi_probe_entry __acpi_probe_##name \ __used __section("__" #table "_acpi_probe_table") = { \ .id = table_id, \ .type = subtable, \ .subtable_valid = valid, \ .probe_subtbl = fn, \ .driver_data = data, \ } #define ACPI_PROBE_TABLE(name) __##name##_acpi_probe_table #define ACPI_PROBE_TABLE_END(name) __##name##_acpi_probe_table_end int __acpi_probe_device_table(struct acpi_probe_entry *start, int nr); #define acpi_probe_device_table(t) \ ({ \ extern struct acpi_probe_entry ACPI_PROBE_TABLE(t), \ ACPI_PROBE_TABLE_END(t); \ __acpi_probe_device_table(&ACPI_PROBE_TABLE(t), \ (&ACPI_PROBE_TABLE_END(t) - \ &ACPI_PROBE_TABLE(t))); \ }) #else static inline int acpi_dev_get_property(struct acpi_device *adev, const char *name, acpi_object_type type, const union acpi_object **obj) { return -ENXIO; } static inline int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, const char *name, size_t index, size_t num_args, struct fwnode_reference_args *args) { return -ENXIO; } static inline int acpi_node_get_property_reference(const struct fwnode_handle *fwnode, const char *name, size_t index, struct fwnode_reference_args *args) { return -ENXIO; } static inline int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname, void **valptr) { return -ENXIO; } static inline int acpi_dev_prop_read_single(const struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, void *val) { return -ENXIO; } static inline int acpi_node_prop_read(const struct fwnode_handle *fwnode, const char *propname, enum dev_prop_type proptype, void *val, size_t nval) { return -ENXIO; } static inline int acpi_dev_prop_read(const struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, void *val, size_t nval) { return -ENXIO; } static inline struct fwnode_handle * acpi_get_next_subnode(const struct fwnode_handle *fwnode, struct fwnode_handle *child) { return NULL; } static inline struct fwnode_handle * acpi_node_get_parent(const struct fwnode_handle *fwnode) { return NULL; } static inline struct fwnode_handle * acpi_graph_get_next_endpoint(const struct fwnode_handle *fwnode, struct fwnode_handle *prev) { return ERR_PTR(-ENXIO); } static inline int acpi_graph_get_remote_endpoint(const struct fwnode_handle *fwnode, struct fwnode_handle **remote, struct fwnode_handle **port, struct fwnode_handle **endpoint) { return -ENXIO; } #define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, valid, data, fn) \ static const void * __acpi_table_##name[] \ __attribute__((unused)) \ = { (void *) table_id, \ (void *) subtable, \ (void *) valid, \ (void *) fn, \ (void *) data } #define acpi_probe_device_table(t) ({ int __r = 0; __r;}) #endif #ifdef CONFIG_ACPI_TABLE_UPGRADE void acpi_table_upgrade(void); #else static inline void acpi_table_upgrade(void) { } #endif #if defined(CONFIG_ACPI) && defined(CONFIG_ACPI_WATCHDOG) extern bool acpi_has_watchdog(void); #else static inline bool acpi_has_watchdog(void) { return false; } #endif #ifdef CONFIG_ACPI_SPCR_TABLE extern bool qdf2400_e44_present; int acpi_parse_spcr(bool enable_earlycon, bool enable_console); #else static inline int acpi_parse_spcr(bool enable_earlycon, bool enable_console) { return 0; } #endif #if IS_ENABLED(CONFIG_ACPI_GENERIC_GSI) int acpi_irq_get(acpi_handle handle, unsigned int index, struct resource *res); #else static inline int acpi_irq_get(acpi_handle handle, unsigned int index, struct resource *res) { return -EINVAL; } #endif #ifdef CONFIG_ACPI_LPIT int lpit_read_residency_count_address(u64 *address); #else static inline int lpit_read_residency_count_address(u64 *address) { return -EINVAL; } #endif #ifdef CONFIG_ACPI_PPTT int acpi_pptt_cpu_is_thread(unsigned int cpu); int find_acpi_cpu_topology(unsigned int cpu, int level); int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_topology_hetero_id(unsigned int cpu); int find_acpi_cpu_cache_topology(unsigned int cpu, int level); #else static inline int acpi_pptt_cpu_is_thread(unsigned int cpu) { return -EINVAL; } static inline int find_acpi_cpu_topology(unsigned int cpu, int level) { return -EINVAL; } static inline int find_acpi_cpu_topology_package(unsigned int cpu) { return -EINVAL; } static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu) { return -EINVAL; } static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level) { return -EINVAL; } #endif #ifdef CONFIG_ACPI extern int acpi_platform_notify(struct device *dev, enum kobject_action action); #else static inline int acpi_platform_notify(struct device *dev, enum kobject_action action) { return 0; } #endif #endif /*_LINUX_ACPI_H*/
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MEMREMAP_H_ #define _LINUX_MEMREMAP_H_ #include <linux/range.h> #include <linux/ioport.h> #include <linux/percpu-refcount.h> struct resource; struct device; /** * struct vmem_altmap - pre-allocated storage for vmemmap_populate * @base_pfn: base of the entire dev_pagemap mapping * @reserve: pages mapped, but reserved for driver use (relative to @base) * @free: free pages set aside in the mapping for memmap storage * @align: pages reserved to meet allocation alignments * @alloc: track pages consumed, private to vmemmap_populate() */ struct vmem_altmap { const unsigned long base_pfn; const unsigned long end_pfn; const unsigned long reserve; unsigned long free; unsigned long align; unsigned long alloc; }; /* * Specialize ZONE_DEVICE memory into multiple types each having differents * usage. * * MEMORY_DEVICE_PRIVATE: * Device memory that is not directly addressable by the CPU: CPU can neither * read nor write private memory. In this case, we do still have struct pages * backing the device memory. Doing so simplifies the implementation, but it is * important to remember that there are certain points at which the struct page * must be treated as an opaque object, rather than a "normal" struct page. * * A more complete discussion of unaddressable memory may be found in * include/linux/hmm.h and Documentation/vm/hmm.rst. * * MEMORY_DEVICE_FS_DAX: * Host memory that has similar access semantics as System RAM i.e. DMA * coherent and supports page pinning. In support of coordinating page * pinning vs other operations MEMORY_DEVICE_FS_DAX arranges for a * wakeup event whenever a page is unpinned and becomes idle. This * wakeup is used to coordinate physical address space management (ex: * fs truncate/hole punch) vs pinned pages (ex: device dma). * * MEMORY_DEVICE_GENERIC: * Host memory that has similar access semantics as System RAM i.e. DMA * coherent and supports page pinning. This is for example used by DAX devices * that expose memory using a character device. * * MEMORY_DEVICE_PCI_P2PDMA: * Device memory residing in a PCI BAR intended for use with Peer-to-Peer * transactions. */ enum memory_type { /* 0 is reserved to catch uninitialized type fields */ MEMORY_DEVICE_PRIVATE = 1, MEMORY_DEVICE_FS_DAX, MEMORY_DEVICE_GENERIC, MEMORY_DEVICE_PCI_P2PDMA, }; struct dev_pagemap_ops { /* * Called once the page refcount reaches 1. (ZONE_DEVICE pages never * reach 0 refcount unless there is a refcount bug. This allows the * device driver to implement its own memory management.) */ void (*page_free)(struct page *page); /* * Transition the refcount in struct dev_pagemap to the dead state. */ void (*kill)(struct dev_pagemap *pgmap); /* * Wait for refcount in struct dev_pagemap to be idle and reap it. */ void (*cleanup)(struct dev_pagemap *pgmap); /* * Used for private (un-addressable) device memory only. Must migrate * the page back to a CPU accessible page. */ vm_fault_t (*migrate_to_ram)(struct vm_fault *vmf); }; #define PGMAP_ALTMAP_VALID (1 << 0) /** * struct dev_pagemap - metadata for ZONE_DEVICE mappings * @altmap: pre-allocated/reserved memory for vmemmap allocations * @ref: reference count that pins the devm_memremap_pages() mapping * @internal_ref: internal reference if @ref is not provided by the caller * @done: completion for @internal_ref * @type: memory type: see MEMORY_* in memory_hotplug.h * @flags: PGMAP_* flags to specify defailed behavior * @ops: method table * @owner: an opaque pointer identifying the entity that manages this * instance. Used by various helpers to make sure that no * foreign ZONE_DEVICE memory is accessed. * @nr_range: number of ranges to be mapped * @range: range to be mapped when nr_range == 1 * @ranges: array of ranges to be mapped when nr_range > 1 */ struct dev_pagemap { struct vmem_altmap altmap; struct percpu_ref *ref; struct percpu_ref internal_ref; struct completion done; enum memory_type type; unsigned int flags; const struct dev_pagemap_ops *ops; void *owner; int nr_range; union { struct range range; struct range ranges[0]; }; }; static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) { if (pgmap->flags & PGMAP_ALTMAP_VALID) return &pgmap->altmap; return NULL; } #ifdef CONFIG_ZONE_DEVICE void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap); struct dev_pagemap *get_dev_pagemap(unsigned long pfn, struct dev_pagemap *pgmap); bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn); unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); unsigned long memremap_compat_align(void); #else static inline void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { /* * Fail attempts to call devm_memremap_pages() without * ZONE_DEVICE support enabled, this requires callers to fall * back to plain devm_memremap() based on config */ WARN_ON_ONCE(1); return ERR_PTR(-ENXIO); } static inline void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap) { } static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn, struct dev_pagemap *pgmap) { return NULL; } static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn) { return false; } static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) { return 0; } static inline void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns) { } /* when memremap_pages() is disabled all archs can remap a single page */ static inline unsigned long memremap_compat_align(void) { return PAGE_SIZE; } #endif /* CONFIG_ZONE_DEVICE */ static inline void put_dev_pagemap(struct dev_pagemap *pgmap) { if (pgmap) percpu_ref_put(pgmap->ref); } #endif /* _LINUX_MEMREMAP_H_ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 /* SPDX-License-Identifier: GPL-2.0-only */ /* * NSA Security-Enhanced Linux (SELinux) security module * * This file contains the SELinux security data structures for kernel objects. * * Author(s): Stephen Smalley, <sds@tycho.nsa.gov> * Chris Vance, <cvance@nai.com> * Wayne Salamon, <wsalamon@nai.com> * James Morris <jmorris@redhat.com> * * Copyright (C) 2001,2002 Networks Associates Technology, Inc. * Copyright (C) 2003 Red Hat, Inc., James Morris <jmorris@redhat.com> * Copyright (C) 2016 Mellanox Technologies */ #ifndef _SELINUX_OBJSEC_H_ #define _SELINUX_OBJSEC_H_ #include <linux/list.h> #include <linux/sched.h> #include <linux/fs.h> #include <linux/binfmts.h> #include <linux/in.h> #include <linux/spinlock.h> #include <linux/lsm_hooks.h> #include <linux/msg.h> #include <net/net_namespace.h> #include "flask.h" #include "avc.h" struct task_security_struct { u32 osid; /* SID prior to last execve */ u32 sid; /* current SID */ u32 exec_sid; /* exec SID */ u32 create_sid; /* fscreate SID */ u32 keycreate_sid; /* keycreate SID */ u32 sockcreate_sid; /* fscreate SID */ } __randomize_layout; enum label_initialized { LABEL_INVALID, /* invalid or not initialized */ LABEL_INITIALIZED, /* initialized */ LABEL_PENDING }; struct inode_security_struct { struct inode *inode; /* back pointer to inode object */ struct list_head list; /* list of inode_security_struct */ u32 task_sid; /* SID of creating task */ u32 sid; /* SID of this object */ u16 sclass; /* security class of this object */ unsigned char initialized; /* initialization flag */ spinlock_t lock; }; struct file_security_struct { u32 sid; /* SID of open file description */ u32 fown_sid; /* SID of file owner (for SIGIO) */ u32 isid; /* SID of inode at the time of file open */ u32 pseqno; /* Policy seqno at the time of file open */ }; struct superblock_security_struct { struct super_block *sb; /* back pointer to sb object */ u32 sid; /* SID of file system superblock */ u32 def_sid; /* default SID for labeling */ u32 mntpoint_sid; /* SECURITY_FS_USE_MNTPOINT context for files */ unsigned short behavior; /* labeling behavior */ unsigned short flags; /* which mount options were specified */ struct mutex lock; struct list_head isec_head; spinlock_t isec_lock; }; struct msg_security_struct { u32 sid; /* SID of message */ }; struct ipc_security_struct { u16 sclass; /* security class of this object */ u32 sid; /* SID of IPC resource */ }; struct netif_security_struct { struct net *ns; /* network namespace */ int ifindex; /* device index */ u32 sid; /* SID for this interface */ }; struct netnode_security_struct { union { __be32 ipv4; /* IPv4 node address */ struct in6_addr ipv6; /* IPv6 node address */ } addr; u32 sid; /* SID for this node */ u16 family; /* address family */ }; struct netport_security_struct { u32 sid; /* SID for this node */ u16 port; /* port number */ u8 protocol; /* transport protocol */ }; struct sk_security_struct { #ifdef CONFIG_NETLABEL enum { /* NetLabel state */ NLBL_UNSET = 0, NLBL_REQUIRE, NLBL_LABELED, NLBL_REQSKB, NLBL_CONNLABELED, } nlbl_state; struct netlbl_lsm_secattr *nlbl_secattr; /* NetLabel sec attributes */ #endif u32 sid; /* SID of this object */ u32 peer_sid; /* SID of peer */ u16 sclass; /* sock security class */ enum { /* SCTP association state */ SCTP_ASSOC_UNSET = 0, SCTP_ASSOC_SET, } sctp_assoc_state; }; struct tun_security_struct { u32 sid; /* SID for the tun device sockets */ }; struct key_security_struct { u32 sid; /* SID of key */ }; struct ib_security_struct { u32 sid; /* SID of the queue pair or MAD agent */ }; struct pkey_security_struct { u64 subnet_prefix; /* Port subnet prefix */ u16 pkey; /* PKey number */ u32 sid; /* SID of pkey */ }; struct bpf_security_struct { u32 sid; /* SID of bpf obj creator */ }; struct perf_event_security_struct { u32 sid; /* SID of perf_event obj creator */ }; extern struct lsm_blob_sizes selinux_blob_sizes; static inline struct task_security_struct *selinux_cred(const struct cred *cred) { return cred->security + selinux_blob_sizes.lbs_cred; } static inline struct file_security_struct *selinux_file(const struct file *file) { return file->f_security + selinux_blob_sizes.lbs_file; } static inline struct inode_security_struct *selinux_inode( const struct inode *inode) { if (unlikely(!inode->i_security)) return NULL; return inode->i_security + selinux_blob_sizes.lbs_inode; } static inline struct msg_security_struct *selinux_msg_msg( const struct msg_msg *msg_msg) { return msg_msg->security + selinux_blob_sizes.lbs_msg_msg; } static inline struct ipc_security_struct *selinux_ipc( const struct kern_ipc_perm *ipc) { return ipc->security + selinux_blob_sizes.lbs_ipc; } /* * get the subjective security ID of the current task */ static inline u32 current_sid(void) { const struct task_security_struct *tsec = selinux_cred(current_cred()); return tsec->sid; } #endif /* _SELINUX_OBJSEC_H_ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef INT_BLK_MQ_TAG_H #define INT_BLK_MQ_TAG_H /* * Tag address space map. */ struct blk_mq_tags { unsigned int nr_tags; unsigned int nr_reserved_tags; atomic_t active_queues; struct sbitmap_queue *bitmap_tags; struct sbitmap_queue *breserved_tags; struct sbitmap_queue __bitmap_tags; struct sbitmap_queue __breserved_tags; struct request **rqs; struct request **static_rqs; struct list_head page_list; /* * used to clear request reference in rqs[] before freeing one * request pool */ spinlock_t lock; }; extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, unsigned int flags); extern void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags); extern int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int flags); extern void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set); extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, unsigned int tag); extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags **tags, unsigned int depth, bool can_grow); extern void blk_mq_tag_resize_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int size); extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, void *priv); void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, void *priv); static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt, struct blk_mq_hw_ctx *hctx) { if (!hctx) return &bt->ws[0]; return sbq_wait_ptr(bt, &hctx->wait_index); } enum { BLK_MQ_NO_TAG = -1U, BLK_MQ_TAG_MIN = 1, BLK_MQ_TAG_MAX = BLK_MQ_NO_TAG - 1, }; extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *); extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *); static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) return false; return __blk_mq_tag_busy(hctx); } static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) { if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) return; __blk_mq_tag_idle(hctx); } static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags, unsigned int tag) { return tag < tags->nr_reserved_tags; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Hash algorithms. * * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_INTERNAL_HASH_H #define _CRYPTO_INTERNAL_HASH_H #include <crypto/algapi.h> #include <crypto/hash.h> struct ahash_request; struct scatterlist; struct crypto_hash_walk { char *data; unsigned int offset; unsigned int alignmask; struct page *pg; unsigned int entrylen; unsigned int total; struct scatterlist *sg; unsigned int flags; }; struct ahash_instance { void (*free)(struct ahash_instance *inst); union { struct { char head[offsetof(struct ahash_alg, halg.base)]; struct crypto_instance base; } s; struct ahash_alg alg; }; }; struct shash_instance { void (*free)(struct shash_instance *inst); union { struct { char head[offsetof(struct shash_alg, base)]; struct crypto_instance base; } s; struct shash_alg alg; }; }; struct crypto_ahash_spawn { struct crypto_spawn base; }; struct crypto_shash_spawn { struct crypto_spawn base; }; int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err); int crypto_hash_walk_first(struct ahash_request *req, struct crypto_hash_walk *walk); static inline int crypto_hash_walk_last(struct crypto_hash_walk *walk) { return !(walk->entrylen | walk->total); } int crypto_register_ahash(struct ahash_alg *alg); void crypto_unregister_ahash(struct ahash_alg *alg); int crypto_register_ahashes(struct ahash_alg *algs, int count); void crypto_unregister_ahashes(struct ahash_alg *algs, int count); int ahash_register_instance(struct crypto_template *tmpl, struct ahash_instance *inst); bool crypto_shash_alg_has_setkey(struct shash_alg *alg); static inline bool crypto_shash_alg_needs_key(struct shash_alg *alg) { return crypto_shash_alg_has_setkey(alg) && !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY); } bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg); int crypto_grab_ahash(struct crypto_ahash_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); static inline void crypto_drop_ahash(struct crypto_ahash_spawn *spawn) { crypto_drop_spawn(&spawn->base); } static inline struct hash_alg_common *crypto_spawn_ahash_alg( struct crypto_ahash_spawn *spawn) { return __crypto_hash_alg_common(spawn->base.alg); } int crypto_register_shash(struct shash_alg *alg); void crypto_unregister_shash(struct shash_alg *alg); int crypto_register_shashes(struct shash_alg *algs, int count); void crypto_unregister_shashes(struct shash_alg *algs, int count); int shash_register_instance(struct crypto_template *tmpl, struct shash_instance *inst); void shash_free_singlespawn_instance(struct shash_instance *inst); int crypto_grab_shash(struct crypto_shash_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); static inline void crypto_drop_shash(struct crypto_shash_spawn *spawn) { crypto_drop_spawn(&spawn->base); } static inline struct shash_alg *crypto_spawn_shash_alg( struct crypto_shash_spawn *spawn) { return __crypto_shash_alg(spawn->base.alg); } int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc); int shash_ahash_finup(struct ahash_request *req, struct shash_desc *desc); int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc); int crypto_init_shash_ops_async(struct crypto_tfm *tfm); static inline void *crypto_ahash_ctx(struct crypto_ahash *tfm) { return crypto_tfm_ctx(crypto_ahash_tfm(tfm)); } static inline struct ahash_alg *__crypto_ahash_alg(struct crypto_alg *alg) { return container_of(__crypto_hash_alg_common(alg), struct ahash_alg, halg); } static inline void crypto_ahash_set_reqsize(struct crypto_ahash *tfm, unsigned int reqsize) { tfm->reqsize = reqsize; } static inline struct crypto_instance *ahash_crypto_instance( struct ahash_instance *inst) { return &inst->s.base; } static inline struct ahash_instance *ahash_instance( struct crypto_instance *inst) { return container_of(inst, struct ahash_instance, s.base); } static inline struct ahash_instance *ahash_alg_instance( struct crypto_ahash *ahash) { return ahash_instance(crypto_tfm_alg_instance(&ahash->base)); } static inline void *ahash_instance_ctx(struct ahash_instance *inst) { return crypto_instance_ctx(ahash_crypto_instance(inst)); } static inline void ahash_request_complete(struct ahash_request *req, int err) { req->base.complete(&req->base, err); } static inline u32 ahash_request_flags(struct ahash_request *req) { return req->base.flags; } static inline struct crypto_ahash *crypto_spawn_ahash( struct crypto_ahash_spawn *spawn) { return crypto_spawn_tfm2(&spawn->base); } static inline int ahash_enqueue_request(struct crypto_queue *queue, struct ahash_request *request) { return crypto_enqueue_request(queue, &request->base); } static inline struct ahash_request *ahash_dequeue_request( struct crypto_queue *queue) { return ahash_request_cast(crypto_dequeue_request(queue)); } static inline void *crypto_shash_ctx(struct crypto_shash *tfm) { return crypto_tfm_ctx(&tfm->base); } static inline struct crypto_instance *shash_crypto_instance( struct shash_instance *inst) { return &inst->s.base; } static inline struct shash_instance *shash_instance( struct crypto_instance *inst) { return container_of(inst, struct shash_instance, s.base); } static inline struct shash_instance *shash_alg_instance( struct crypto_shash *shash) { return shash_instance(crypto_tfm_alg_instance(&shash->base)); } static inline void *shash_instance_ctx(struct shash_instance *inst) { return crypto_instance_ctx(shash_crypto_instance(inst)); } static inline struct crypto_shash *crypto_spawn_shash( struct crypto_shash_spawn *spawn) { return crypto_spawn_tfm2(&spawn->base); } static inline void *crypto_shash_ctx_aligned(struct crypto_shash *tfm) { return crypto_tfm_ctx_aligned(&tfm->base); } static inline struct crypto_shash *__crypto_shash_cast(struct crypto_tfm *tfm) { return container_of(tfm, struct crypto_shash, base); } #endif /* _CRYPTO_INTERNAL_HASH_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright 2002-2005, Devicescape Software, Inc. * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015-2017 Intel Deutschland GmbH * Copyright(c) 2020-2021 Intel Corporation */ #ifndef STA_INFO_H #define STA_INFO_H #include <linux/list.h> #include <linux/types.h> #include <linux/if_ether.h> #include <linux/workqueue.h> #include <linux/average.h> #include <linux/bitfield.h> #include <linux/etherdevice.h> #include <linux/rhashtable.h> #include <linux/u64_stats_sync.h> #include "key.h" /** * enum ieee80211_sta_info_flags - Stations flags * * These flags are used with &struct sta_info's @flags member, but * only indirectly with set_sta_flag() and friends. * * @WLAN_STA_AUTH: Station is authenticated. * @WLAN_STA_ASSOC: Station is associated. * @WLAN_STA_PS_STA: Station is in power-save mode * @WLAN_STA_AUTHORIZED: Station is authorized to send/receive traffic. * This bit is always checked so needs to be enabled for all stations * when virtual port control is not in use. * @WLAN_STA_SHORT_PREAMBLE: Station is capable of receiving short-preamble * frames. * @WLAN_STA_WDS: Station is one of our WDS peers. * @WLAN_STA_CLEAR_PS_FILT: Clear PS filter in hardware (using the * IEEE80211_TX_CTL_CLEAR_PS_FILT control flag) when the next * frame to this station is transmitted. * @WLAN_STA_MFP: Management frame protection is used with this STA. * @WLAN_STA_BLOCK_BA: Used to deny ADDBA requests (both TX and RX) * during suspend/resume and station removal. * @WLAN_STA_PS_DRIVER: driver requires keeping this station in * power-save mode logically to flush frames that might still * be in the queues * @WLAN_STA_PSPOLL: Station sent PS-poll while driver was keeping * station in power-save mode, reply when the driver unblocks. * @WLAN_STA_TDLS_PEER: Station is a TDLS peer. * @WLAN_STA_TDLS_PEER_AUTH: This TDLS peer is authorized to send direct * packets. This means the link is enabled. * @WLAN_STA_TDLS_INITIATOR: We are the initiator of the TDLS link with this * station. * @WLAN_STA_TDLS_CHAN_SWITCH: This TDLS peer supports TDLS channel-switching * @WLAN_STA_TDLS_OFF_CHANNEL: The local STA is currently off-channel with this * TDLS peer * @WLAN_STA_TDLS_WIDER_BW: This TDLS peer supports working on a wider bw on * the BSS base channel. * @WLAN_STA_UAPSD: Station requested unscheduled SP while driver was * keeping station in power-save mode, reply when the driver * unblocks the station. * @WLAN_STA_SP: Station is in a service period, so don't try to * reply to other uAPSD trigger frames or PS-Poll. * @WLAN_STA_4ADDR_EVENT: 4-addr event was already sent for this frame. * @WLAN_STA_INSERTED: This station is inserted into the hash table. * @WLAN_STA_RATE_CONTROL: rate control was initialized for this station. * @WLAN_STA_TOFFSET_KNOWN: toffset calculated for this station is valid. * @WLAN_STA_MPSP_OWNER: local STA is owner of a mesh Peer Service Period. * @WLAN_STA_MPSP_RECIPIENT: local STA is recipient of a MPSP. * @WLAN_STA_PS_DELIVER: station woke up, but we're still blocking TX * until pending frames are delivered * @WLAN_STA_USES_ENCRYPTION: This station was configured for encryption, * so drop all packets without a key later. * * @NUM_WLAN_STA_FLAGS: number of defined flags */ enum ieee80211_sta_info_flags { WLAN_STA_AUTH, WLAN_STA_ASSOC, WLAN_STA_PS_STA, WLAN_STA_AUTHORIZED, WLAN_STA_SHORT_PREAMBLE, WLAN_STA_WDS, WLAN_STA_CLEAR_PS_FILT, WLAN_STA_MFP, WLAN_STA_BLOCK_BA, WLAN_STA_PS_DRIVER, WLAN_STA_PSPOLL, WLAN_STA_TDLS_PEER, WLAN_STA_TDLS_PEER_AUTH, WLAN_STA_TDLS_INITIATOR, WLAN_STA_TDLS_CHAN_SWITCH, WLAN_STA_TDLS_OFF_CHANNEL, WLAN_STA_TDLS_WIDER_BW, WLAN_STA_UAPSD, WLAN_STA_SP, WLAN_STA_4ADDR_EVENT, WLAN_STA_INSERTED, WLAN_STA_RATE_CONTROL, WLAN_STA_TOFFSET_KNOWN, WLAN_STA_MPSP_OWNER, WLAN_STA_MPSP_RECIPIENT, WLAN_STA_PS_DELIVER, WLAN_STA_USES_ENCRYPTION, NUM_WLAN_STA_FLAGS, }; #define ADDBA_RESP_INTERVAL HZ #define HT_AGG_MAX_RETRIES 15 #define HT_AGG_BURST_RETRIES 3 #define HT_AGG_RETRIES_PERIOD (15 * HZ) #define HT_AGG_STATE_DRV_READY 0 #define HT_AGG_STATE_RESPONSE_RECEIVED 1 #define HT_AGG_STATE_OPERATIONAL 2 #define HT_AGG_STATE_STOPPING 3 #define HT_AGG_STATE_WANT_START 4 #define HT_AGG_STATE_WANT_STOP 5 #define HT_AGG_STATE_START_CB 6 #define HT_AGG_STATE_STOP_CB 7 #define HT_AGG_STATE_SENT_ADDBA 8 DECLARE_EWMA(avg_signal, 10, 8) enum ieee80211_agg_stop_reason { AGG_STOP_DECLINED, AGG_STOP_LOCAL_REQUEST, AGG_STOP_PEER_REQUEST, AGG_STOP_DESTROY_STA, }; /* Debugfs flags to enable/disable use of RX/TX airtime in scheduler */ #define AIRTIME_USE_TX BIT(0) #define AIRTIME_USE_RX BIT(1) struct airtime_info { u64 rx_airtime; u64 tx_airtime; s64 deficit; atomic_t aql_tx_pending; /* Estimated airtime for frames pending */ u32 aql_limit_low; u32 aql_limit_high; }; void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local, struct sta_info *sta, u8 ac, u16 tx_airtime, bool tx_completed); struct sta_info; /** * struct tid_ampdu_tx - TID aggregation information (Tx). * * @rcu_head: rcu head for freeing structure * @session_timer: check if we keep Tx-ing on the TID (by timeout value) * @addba_resp_timer: timer for peer's response to addba request * @pending: pending frames queue -- use sta's spinlock to protect * @sta: station we are attached to * @dialog_token: dialog token for aggregation session * @timeout: session timeout value to be filled in ADDBA requests * @tid: TID number * @state: session state (see above) * @last_tx: jiffies of last tx activity * @stop_initiator: initiator of a session stop * @tx_stop: TX DelBA frame when stopping * @buf_size: reorder buffer size at receiver * @failed_bar_ssn: ssn of the last failed BAR tx attempt * @bar_pending: BAR needs to be re-sent * @amsdu: support A-MSDU withing A-MDPU * * This structure's lifetime is managed by RCU, assignments to * the array holding it must hold the aggregation mutex. * * The TX path can access it under RCU lock-free if, and * only if, the state has the flag %HT_AGG_STATE_OPERATIONAL * set. Otherwise, the TX path must also acquire the spinlock * and re-check the state, see comments in the tx code * touching it. */ struct tid_ampdu_tx { struct rcu_head rcu_head; struct timer_list session_timer; struct timer_list addba_resp_timer; struct sk_buff_head pending; struct sta_info *sta; unsigned long state; unsigned long last_tx; u16 timeout; u8 dialog_token; u8 stop_initiator; bool tx_stop; u16 buf_size; u16 ssn; u16 failed_bar_ssn; bool bar_pending; bool amsdu; u8 tid; }; /** * struct tid_ampdu_rx - TID aggregation information (Rx). * * @reorder_buf: buffer to reorder incoming aggregated MPDUs. An MPDU may be an * A-MSDU with individually reported subframes. * @reorder_buf_filtered: bitmap indicating where there are filtered frames in * the reorder buffer that should be ignored when releasing frames * @reorder_time: jiffies when skb was added * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value) * @reorder_timer: releases expired frames from the reorder buffer. * @sta: station we are attached to * @last_rx: jiffies of last rx activity * @head_seq_num: head sequence number in reordering buffer. * @stored_mpdu_num: number of MPDUs in reordering buffer * @ssn: Starting Sequence Number expected to be aggregated. * @buf_size: buffer size for incoming A-MPDUs * @timeout: reset timer value (in TUs). * @tid: TID number * @rcu_head: RCU head used for freeing this struct * @reorder_lock: serializes access to reorder buffer, see below. * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and * and ssn. * @removed: this session is removed (but might have been found due to RCU) * @started: this session has started (head ssn or higher was received) * * This structure's lifetime is managed by RCU, assignments to * the array holding it must hold the aggregation mutex. * * The @reorder_lock is used to protect the members of this * struct, except for @timeout, @buf_size and @dialog_token, * which are constant across the lifetime of the struct (the * dialog token being used only for debugging). */ struct tid_ampdu_rx { struct rcu_head rcu_head; spinlock_t reorder_lock; u64 reorder_buf_filtered; struct sk_buff_head *reorder_buf; unsigned long *reorder_time; struct sta_info *sta; struct timer_list session_timer; struct timer_list reorder_timer; unsigned long last_rx; u16 head_seq_num; u16 stored_mpdu_num; u16 ssn; u16 buf_size; u16 timeout; u8 tid; u8 auto_seq:1, removed:1, started:1; }; /** * struct sta_ampdu_mlme - STA aggregation information. * * @mtx: mutex to protect all TX data (except non-NULL assignments * to tid_tx[idx], which are protected by the sta spinlock) * tid_start_tx is also protected by sta->lock. * @tid_rx: aggregation info for Rx per TID -- RCU protected * @tid_rx_token: dialog tokens for valid aggregation sessions * @tid_rx_timer_expired: bitmap indicating on which TIDs the * RX timer expired until the work for it runs * @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the * driver requested to close until the work for it runs * @tid_rx_manage_offl: bitmap indicating which BA sessions were requested * to be treated as started/stopped due to offloading * @agg_session_valid: bitmap indicating which TID has a rx BA session open on * @unexpected_agg: bitmap indicating which TID already sent a delBA due to * unexpected aggregation related frames outside a session * @work: work struct for starting/stopping aggregation * @tid_tx: aggregation info for Tx per TID * @tid_start_tx: sessions where start was requested * @last_addba_req_time: timestamp of the last addBA request. * @addba_req_num: number of times addBA request has been sent. * @dialog_token_allocator: dialog token enumerator for each new session; */ struct sta_ampdu_mlme { struct mutex mtx; /* rx */ struct tid_ampdu_rx __rcu *tid_rx[IEEE80211_NUM_TIDS]; u8 tid_rx_token[IEEE80211_NUM_TIDS]; unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long tid_rx_manage_offl[BITS_TO_LONGS(2 * IEEE80211_NUM_TIDS)]; unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long unexpected_agg[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; /* tx */ struct work_struct work; struct tid_ampdu_tx __rcu *tid_tx[IEEE80211_NUM_TIDS]; struct tid_ampdu_tx *tid_start_tx[IEEE80211_NUM_TIDS]; unsigned long last_addba_req_time[IEEE80211_NUM_TIDS]; u8 addba_req_num[IEEE80211_NUM_TIDS]; u8 dialog_token_allocator; }; /* Value to indicate no TID reservation */ #define IEEE80211_TID_UNRESERVED 0xff #define IEEE80211_FAST_XMIT_MAX_IV 18 /** * struct ieee80211_fast_tx - TX fastpath information * @key: key to use for hw crypto * @hdr: the 802.11 header to put with the frame * @hdr_len: actual 802.11 header length * @sa_offs: offset of the SA * @da_offs: offset of the DA * @pn_offs: offset where to put PN for crypto (or 0 if not needed) * @band: band this will be transmitted on, for tx_info * @rcu_head: RCU head to free this struct * * This struct is small enough so that the common case (maximum crypto * header length of 8 like for CCMP/GCMP) fits into a single 64-byte * cache line. */ struct ieee80211_fast_tx { struct ieee80211_key *key; u8 hdr_len; u8 sa_offs, da_offs, pn_offs; u8 band; u8 hdr[30 + 2 + IEEE80211_FAST_XMIT_MAX_IV + sizeof(rfc1042_header)] __aligned(2); struct rcu_head rcu_head; }; /** * struct ieee80211_fast_rx - RX fastpath information * @dev: netdevice for reporting the SKB * @vif_type: (P2P-less) interface type of the original sdata (sdata->vif.type) * @vif_addr: interface address * @rfc1042_hdr: copy of the RFC 1042 SNAP header (to have in cache) * @control_port_protocol: control port protocol copied from sdata * @expected_ds_bits: from/to DS bits expected * @icv_len: length of the MIC if present * @key: bool indicating encryption is expected (key is set) * @internal_forward: forward froms internally on AP/VLAN type interfaces * @uses_rss: copy of USES_RSS hw flag * @da_offs: offset of the DA in the header (for header conversion) * @sa_offs: offset of the SA in the header (for header conversion) * @rcu_head: RCU head for freeing this structure */ struct ieee80211_fast_rx { struct net_device *dev; enum nl80211_iftype vif_type; u8 vif_addr[ETH_ALEN] __aligned(2); u8 rfc1042_hdr[6] __aligned(2); __be16 control_port_protocol; __le16 expected_ds_bits; u8 icv_len; u8 key:1, internal_forward:1, uses_rss:1; u8 da_offs, sa_offs; struct rcu_head rcu_head; }; /* we use only values in the range 0-100, so pick a large precision */ DECLARE_EWMA(mesh_fail_avg, 20, 8) DECLARE_EWMA(mesh_tx_rate_avg, 8, 16) /** * struct mesh_sta - mesh STA information * @plink_lock: serialize access to plink fields * @llid: Local link ID * @plid: Peer link ID * @aid: local aid supplied by peer * @reason: Cancel reason on PLINK_HOLDING state * @plink_retries: Retries in establishment * @plink_state: peer link state * @plink_timeout: timeout of peer link * @plink_timer: peer link watch timer * @plink_sta: peer link watch timer's sta_info * @t_offset: timing offset relative to this host * @t_offset_setpoint: reference timing offset of this sta to be used when * calculating clockdrift * @local_pm: local link-specific power save mode * @peer_pm: peer-specific power save mode towards local STA * @nonpeer_pm: STA power save mode towards non-peer neighbors * @processed_beacon: set to true after peer rates and capabilities are * processed * @connected_to_gate: true if mesh STA has a path to a mesh gate * @connected_to_as: true if mesh STA has a path to a authentication server * @fail_avg: moving percentage of failed MSDUs * @tx_rate_avg: moving average of tx bitrate */ struct mesh_sta { struct timer_list plink_timer; struct sta_info *plink_sta; s64 t_offset; s64 t_offset_setpoint; spinlock_t plink_lock; u16 llid; u16 plid; u16 aid; u16 reason; u8 plink_retries; bool processed_beacon; bool connected_to_gate; bool connected_to_as; enum nl80211_plink_state plink_state; u32 plink_timeout; /* mesh power save */ enum nl80211_mesh_power_mode local_pm; enum nl80211_mesh_power_mode peer_pm; enum nl80211_mesh_power_mode nonpeer_pm; /* moving percentage of failed MSDUs */ struct ewma_mesh_fail_avg fail_avg; /* moving average of tx bitrate */ struct ewma_mesh_tx_rate_avg tx_rate_avg; }; DECLARE_EWMA(signal, 10, 8) struct ieee80211_sta_rx_stats { unsigned long packets; unsigned long last_rx; unsigned long num_duplicates; unsigned long fragments; unsigned long dropped; int last_signal; u8 chains; s8 chain_signal_last[IEEE80211_MAX_CHAINS]; u32 last_rate; struct u64_stats_sync syncp; u64 bytes; u64 msdu[IEEE80211_NUM_TIDS + 1]; }; /* * IEEE 802.11-2016 (10.6 "Defragmentation") recommends support for "concurrent * reception of at least one MSDU per access category per associated STA" * on APs, or "at least one MSDU per access category" on other interface types. * * This limit can be increased by changing this define, at the cost of slower * frame reassembly and increased memory use while fragments are pending. */ #define IEEE80211_FRAGMENT_MAX 4 struct ieee80211_fragment_entry { struct sk_buff_head skb_list; unsigned long first_frag_time; u16 seq; u16 extra_len; u16 last_frag; u8 rx_queue; u8 check_sequential_pn:1, /* needed for CCMP/GCMP */ is_protected:1; u8 last_pn[6]; /* PN of the last fragment if CCMP was used */ unsigned int key_color; }; struct ieee80211_fragment_cache { struct ieee80211_fragment_entry entries[IEEE80211_FRAGMENT_MAX]; unsigned int next; }; /* * The bandwidth threshold below which the per-station CoDel parameters will be * scaled to be more lenient (to prevent starvation of slow stations). This * value will be scaled by the number of active stations when it is being * applied. */ #define STA_SLOW_THRESHOLD 6000 /* 6 Mbps */ /** * struct sta_info - STA information * * This structure collects information about a station that * mac80211 is communicating with. * * @list: global linked list entry * @free_list: list entry for keeping track of stations to free * @hash_node: hash node for rhashtable * @addr: station's MAC address - duplicated from public part to * let the hash table work with just a single cacheline * @local: pointer to the global information * @sdata: virtual interface this station belongs to * @ptk: peer keys negotiated with this station, if any * @ptk_idx: last installed peer key index * @gtk: group keys negotiated with this station, if any * @rate_ctrl: rate control algorithm reference * @rate_ctrl_lock: spinlock used to protect rate control data * (data inside the algorithm, so serializes calls there) * @rate_ctrl_priv: rate control private per-STA pointer * @lock: used for locking all fields that require locking, see comments * in the header file. * @drv_deliver_wk: used for delivering frames after driver PS unblocking * @listen_interval: listen interval of this station, when we're acting as AP * @_flags: STA flags, see &enum ieee80211_sta_info_flags, do not use directly * @ps_lock: used for powersave (when mac80211 is the AP) related locking * @ps_tx_buf: buffers (per AC) of frames to transmit to this station * when it leaves power saving state or polls * @tx_filtered: buffers (per AC) of frames we already tried to * transmit but were filtered by hardware due to STA having * entered power saving state, these are also delivered to * the station when it leaves powersave or polls for frames * @driver_buffered_tids: bitmap of TIDs the driver has data buffered on * @txq_buffered_tids: bitmap of TIDs that mac80211 has txq data buffered on * @assoc_at: clock boottime (in ns) of last association * @last_connected: time (in seconds) when a station got connected * @last_seq_ctrl: last received seq/frag number from this STA (per TID * plus one for non-QoS frames) * @tid_seq: per-TID sequence numbers for sending to this STA * @airtime: per-AC struct airtime_info describing airtime statistics for this * station * @airtime_weight: station weight for airtime fairness calculation purposes * @ampdu_mlme: A-MPDU state machine state * @mesh: mesh STA information * @debugfs_dir: debug filesystem directory dentry * @dead: set to true when sta is unlinked * @removed: set to true when sta is being removed from sta_list * @uploaded: set to true when sta is uploaded to the driver * @sta: station information we share with the driver * @sta_state: duplicates information about station state (for debug) * @rcu_head: RCU head used for freeing this station struct * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, * taken from HT/VHT capabilities or VHT operating mode notification * @known_smps_mode: the smps_mode the client thinks we are in. Relevant for * AP only. * @cipher_scheme: optional cipher scheme for this station * @cparams: CoDel parameters for this station. * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED) * @fast_tx: TX fastpath information * @fast_rx: RX fastpath information * @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to * the BSS one. * @tx_stats: TX statistics * @tx_stats.packets: # of packets transmitted * @tx_stats.bytes: # of bytes in all packets transmitted * @tx_stats.last_rate: last TX rate * @tx_stats.msdu: # of transmitted MSDUs per TID * @rx_stats: RX statistics * @rx_stats_avg: averaged RX statistics * @rx_stats_avg.signal: averaged signal * @rx_stats_avg.chain_signal: averaged per-chain signal * @pcpu_rx_stats: per-CPU RX statistics, assigned only if the driver needs * this (by advertising the USES_RSS hw flag) * @status_stats: TX status statistics * @status_stats.filtered: # of filtered frames * @status_stats.retry_failed: # of frames that failed after retry * @status_stats.retry_count: # of retries attempted * @status_stats.lost_packets: # of lost packets * @status_stats.last_pkt_time: timestamp of last ACKed packet * @status_stats.msdu_retries: # of MSDU retries * @status_stats.msdu_failed: # of failed MSDUs * @status_stats.last_ack: last ack timestamp (jiffies) * @status_stats.last_ack_signal: last ACK signal * @status_stats.ack_signal_filled: last ACK signal validity * @status_stats.avg_ack_signal: average ACK signal * @frags: fragment cache */ struct sta_info { /* General information, mostly static */ struct list_head list, free_list; struct rcu_head rcu_head; struct rhlist_head hash_node; u8 addr[ETH_ALEN]; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + NUM_DEFAULT_BEACON_KEYS]; struct ieee80211_key __rcu *ptk[NUM_DEFAULT_KEYS]; u8 ptk_idx; struct rate_control_ref *rate_ctrl; void *rate_ctrl_priv; spinlock_t rate_ctrl_lock; spinlock_t lock; struct ieee80211_fast_tx __rcu *fast_tx; struct ieee80211_fast_rx __rcu *fast_rx; struct ieee80211_sta_rx_stats __percpu *pcpu_rx_stats; #ifdef CONFIG_MAC80211_MESH struct mesh_sta *mesh; #endif struct work_struct drv_deliver_wk; u16 listen_interval; bool dead; bool removed; bool uploaded; enum ieee80211_sta_state sta_state; /* use the accessors defined below */ unsigned long _flags; /* STA powersave lock and frame queues */ spinlock_t ps_lock; struct sk_buff_head ps_tx_buf[IEEE80211_NUM_ACS]; struct sk_buff_head tx_filtered[IEEE80211_NUM_ACS]; unsigned long driver_buffered_tids; unsigned long txq_buffered_tids; u64 assoc_at; long last_connected; /* Updated from RX path only, no locking requirements */ struct ieee80211_sta_rx_stats rx_stats; struct { struct ewma_signal signal; struct ewma_signal chain_signal[IEEE80211_MAX_CHAINS]; } rx_stats_avg; /* Plus 1 for non-QoS frames */ __le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1]; /* Updated from TX status path only, no locking requirements */ struct { unsigned long filtered; unsigned long retry_failed, retry_count; unsigned int lost_packets; unsigned long last_pkt_time; u64 msdu_retries[IEEE80211_NUM_TIDS + 1]; u64 msdu_failed[IEEE80211_NUM_TIDS + 1]; unsigned long last_ack; s8 last_ack_signal; bool ack_signal_filled; struct ewma_avg_signal avg_ack_signal; } status_stats; /* Updated from TX path only, no locking requirements */ struct { u64 packets[IEEE80211_NUM_ACS]; u64 bytes[IEEE80211_NUM_ACS]; struct ieee80211_tx_rate last_rate; struct rate_info last_rate_info; u64 msdu[IEEE80211_NUM_TIDS + 1]; } tx_stats; u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; struct airtime_info airtime[IEEE80211_NUM_ACS]; u16 airtime_weight; /* * Aggregation information, locked with lock. */ struct sta_ampdu_mlme ampdu_mlme; #ifdef CONFIG_MAC80211_DEBUGFS struct dentry *debugfs_dir; #endif enum ieee80211_sta_rx_bandwidth cur_max_bandwidth; enum ieee80211_smps_mode known_smps_mode; const struct ieee80211_cipher_scheme *cipher_scheme; struct codel_params cparams; u8 reserved_tid; struct cfg80211_chan_def tdls_chandef; struct ieee80211_fragment_cache frags; /* keep last! */ struct ieee80211_sta sta; }; static inline enum nl80211_plink_state sta_plink_state(struct sta_info *sta) { #ifdef CONFIG_MAC80211_MESH return sta->mesh->plink_state; #endif return NL80211_PLINK_LISTEN; } static inline void set_sta_flag(struct sta_info *sta, enum ieee80211_sta_info_flags flag) { WARN_ON(flag == WLAN_STA_AUTH || flag == WLAN_STA_ASSOC || flag == WLAN_STA_AUTHORIZED); set_bit(flag, &sta->_flags); } static inline void clear_sta_flag(struct sta_info *sta, enum ieee80211_sta_info_flags flag) { WARN_ON(flag == WLAN_STA_AUTH || flag == WLAN_STA_ASSOC || flag == WLAN_STA_AUTHORIZED); clear_bit(flag, &sta->_flags); } static inline int test_sta_flag(struct sta_info *sta, enum ieee80211_sta_info_flags flag) { return test_bit(flag, &sta->_flags); } static inline int test_and_clear_sta_flag(struct sta_info *sta, enum ieee80211_sta_info_flags flag) { WARN_ON(flag == WLAN_STA_AUTH || flag == WLAN_STA_ASSOC || flag == WLAN_STA_AUTHORIZED); return test_and_clear_bit(flag, &sta->_flags); } static inline int test_and_set_sta_flag(struct sta_info *sta, enum ieee80211_sta_info_flags flag) { WARN_ON(flag == WLAN_STA_AUTH || flag == WLAN_STA_ASSOC || flag == WLAN_STA_AUTHORIZED); return test_and_set_bit(flag, &sta->_flags); } int sta_info_move_state(struct sta_info *sta, enum ieee80211_sta_state new_state); static inline void sta_info_pre_move_state(struct sta_info *sta, enum ieee80211_sta_state new_state) { int ret; WARN_ON_ONCE(test_sta_flag(sta, WLAN_STA_INSERTED)); ret = sta_info_move_state(sta, new_state); WARN_ON_ONCE(ret); } void ieee80211_assign_tid_tx(struct sta_info *sta, int tid, struct tid_ampdu_tx *tid_tx); static inline struct tid_ampdu_tx * rcu_dereference_protected_tid_tx(struct sta_info *sta, int tid) { return rcu_dereference_protected(sta->ampdu_mlme.tid_tx[tid], lockdep_is_held(&sta->lock) || lockdep_is_held(&sta->ampdu_mlme.mtx)); } /* Maximum number of frames to buffer per power saving station per AC */ #define STA_MAX_TX_BUFFER 64 /* Minimum buffered frame expiry time. If STA uses listen interval that is * smaller than this value, the minimum value here is used instead. */ #define STA_TX_BUFFER_EXPIRE (10 * HZ) /* How often station data is cleaned up (e.g., expiration of buffered frames) */ #define STA_INFO_CLEANUP_INTERVAL (10 * HZ) struct rhlist_head *sta_info_hash_lookup(struct ieee80211_local *local, const u8 *addr); /* * Get a STA info, must be under RCU read lock. */ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, const u8 *addr); struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr); /* user must hold sta_mtx or be in RCU critical section */ struct sta_info *sta_info_get_by_addrs(struct ieee80211_local *local, const u8 *sta_addr, const u8 *vif_addr); #define for_each_sta_info(local, _addr, _sta, _tmp) \ rhl_for_each_entry_rcu(_sta, _tmp, \ sta_info_hash_lookup(local, _addr), hash_node) /* * Get STA info by index, BROKEN! */ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, int idx); /* * Create a new STA info, caller owns returned structure * until sta_info_insert(). */ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, const u8 *addr, gfp_t gfp); void sta_info_free(struct ieee80211_local *local, struct sta_info *sta); /* * Insert STA info into hash table/list, returns zero or a * -EEXIST if (if the same MAC address is already present). * * Calling the non-rcu version makes the caller relinquish, * the _rcu version calls read_lock_rcu() and must be called * without it held. */ int sta_info_insert(struct sta_info *sta); int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU); int __must_check __sta_info_destroy(struct sta_info *sta); int sta_info_destroy_addr(struct ieee80211_sub_if_data *sdata, const u8 *addr); int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr); void sta_info_recalc_tim(struct sta_info *sta); int sta_info_init(struct ieee80211_local *local); void sta_info_stop(struct ieee80211_local *local); /** * __sta_info_flush - flush matching STA entries from the STA table * * Returns the number of removed STA entries. * * @sdata: sdata to remove all stations from * @vlans: if the given interface is an AP interface, also flush VLANs */ int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans); /** * sta_info_flush - flush matching STA entries from the STA table * * Returns the number of removed STA entries. * * @sdata: sdata to remove all stations from */ static inline int sta_info_flush(struct ieee80211_sub_if_data *sdata) { return __sta_info_flush(sdata, false); } void sta_set_rate_info_tx(struct sta_info *sta, const struct ieee80211_tx_rate *rate, struct rate_info *rinfo); void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, bool tidstats); u32 sta_get_expected_throughput(struct sta_info *sta); void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, unsigned long exp_time); u8 sta_info_tx_streams(struct sta_info *sta); void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta); void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta); void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta); unsigned long ieee80211_sta_last_active(struct sta_info *sta); enum sta_stats_type { STA_STATS_RATE_TYPE_INVALID = 0, STA_STATS_RATE_TYPE_LEGACY, STA_STATS_RATE_TYPE_HT, STA_STATS_RATE_TYPE_VHT, STA_STATS_RATE_TYPE_HE, STA_STATS_RATE_TYPE_S1G, }; #define STA_STATS_FIELD_HT_MCS GENMASK( 7, 0) #define STA_STATS_FIELD_LEGACY_IDX GENMASK( 3, 0) #define STA_STATS_FIELD_LEGACY_BAND GENMASK( 7, 4) #define STA_STATS_FIELD_VHT_MCS GENMASK( 3, 0) #define STA_STATS_FIELD_VHT_NSS GENMASK( 7, 4) #define STA_STATS_FIELD_HE_MCS GENMASK( 3, 0) #define STA_STATS_FIELD_HE_NSS GENMASK( 7, 4) #define STA_STATS_FIELD_BW GENMASK(11, 8) #define STA_STATS_FIELD_SGI GENMASK(12, 12) #define STA_STATS_FIELD_TYPE GENMASK(15, 13) #define STA_STATS_FIELD_HE_RU GENMASK(18, 16) #define STA_STATS_FIELD_HE_GI GENMASK(20, 19) #define STA_STATS_FIELD_HE_DCM GENMASK(21, 21) #define STA_STATS_FIELD(_n, _v) FIELD_PREP(STA_STATS_FIELD_ ## _n, _v) #define STA_STATS_GET(_n, _v) FIELD_GET(STA_STATS_FIELD_ ## _n, _v) #define STA_STATS_RATE_INVALID 0 static inline u32 sta_stats_encode_rate(struct ieee80211_rx_status *s) { u32 r; r = STA_STATS_FIELD(BW, s->bw); if (s->enc_flags & RX_ENC_FLAG_SHORT_GI) r |= STA_STATS_FIELD(SGI, 1); switch (s->encoding) { case RX_ENC_VHT: r |= STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_VHT); r |= STA_STATS_FIELD(VHT_NSS, s->nss); r |= STA_STATS_FIELD(VHT_MCS, s->rate_idx); break; case RX_ENC_HT: r |= STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_HT); r |= STA_STATS_FIELD(HT_MCS, s->rate_idx); break; case RX_ENC_LEGACY: r |= STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_LEGACY); r |= STA_STATS_FIELD(LEGACY_BAND, s->band); r |= STA_STATS_FIELD(LEGACY_IDX, s->rate_idx); break; case RX_ENC_HE: r |= STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_HE); r |= STA_STATS_FIELD(HE_NSS, s->nss); r |= STA_STATS_FIELD(HE_MCS, s->rate_idx); r |= STA_STATS_FIELD(HE_GI, s->he_gi); r |= STA_STATS_FIELD(HE_RU, s->he_ru); r |= STA_STATS_FIELD(HE_DCM, s->he_dcm); break; default: WARN_ON(1); return STA_STATS_RATE_INVALID; } return r; } #endif /* STA_INFO_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_FS_H #define _LINUX_FS_H #include <linux/linkage.h> #include <linux/wait_bit.h> #include <linux/kdev_t.h> #include <linux/dcache.h> #include <linux/path.h> #include <linux/stat.h> #include <linux/cache.h> #include <linux/list.h> #include <linux/list_lru.h> #include <linux/llist.h> #include <linux/radix-tree.h> #include <linux/xarray.h> #include <linux/rbtree.h> #include <linux/init.h> #include <linux/pid.h> #include <linux/bug.h> #include <linux/mutex.h> #include <linux/rwsem.h> #include <linux/mm_types.h> #include <linux/capability.h> #include <linux/semaphore.h> #include <linux/fcntl.h> #include <linux/rculist_bl.h> #include <linux/atomic.h> #include <linux/shrinker.h> #include <linux/migrate_mode.h> #include <linux/uidgid.h> #include <linux/lockdep.h> #include <linux/percpu-rwsem.h> #include <linux/workqueue.h> #include <linux/delayed_call.h> #include <linux/uuid.h> #include <linux/errseq.h> #include <linux/ioprio.h> #include <linux/fs_types.h> #include <linux/build_bug.h> #include <linux/stddef.h> #include <asm/byteorder.h> #include <uapi/linux/fs.h> struct backing_dev_info; struct bdi_writeback; struct bio; struct export_operations; struct fiemap_extent_info; struct hd_geometry; struct iovec; struct kiocb; struct kobject; struct pipe_inode_info; struct poll_table_struct; struct kstatfs; struct vm_area_struct; struct vfsmount; struct cred; struct swap_info_struct; struct seq_file; struct workqueue_struct; struct iov_iter; struct fscrypt_info; struct fscrypt_operations; struct fsverity_info; struct fsverity_operations; struct fs_context; struct fs_parameter_spec; extern void __init inode_init(void); extern void __init inode_init_early(void); extern void __init files_init(void); extern void __init files_maxfiles_init(void); extern struct files_stat_struct files_stat; extern unsigned long get_max_files(void); extern unsigned int sysctl_nr_open; extern struct inodes_stat_t inodes_stat; extern int leases_enable, lease_break_time; extern int sysctl_protected_symlinks; extern int sysctl_protected_hardlinks; extern int sysctl_protected_fifos; extern int sysctl_protected_regular; typedef __kernel_rwf_t rwf_t; struct buffer_head; typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, ssize_t bytes, void *private); #define MAY_EXEC 0x00000001 #define MAY_WRITE 0x00000002 #define MAY_READ 0x00000004 #define MAY_APPEND 0x00000008 #define MAY_ACCESS 0x00000010 #define MAY_OPEN 0x00000020 #define MAY_CHDIR 0x00000040 /* called from RCU mode, don't block */ #define MAY_NOT_BLOCK 0x00000080 /* * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond * to O_WRONLY and O_RDWR via the strange trick in do_dentry_open() */ /* file is open for reading */ #define FMODE_READ ((__force fmode_t)0x1) /* file is open for writing */ #define FMODE_WRITE ((__force fmode_t)0x2) /* file is seekable */ #define FMODE_LSEEK ((__force fmode_t)0x4) /* file can be accessed using pread */ #define FMODE_PREAD ((__force fmode_t)0x8) /* file can be accessed using pwrite */ #define FMODE_PWRITE ((__force fmode_t)0x10) /* File is opened for execution with sys_execve / sys_uselib */ #define FMODE_EXEC ((__force fmode_t)0x20) /* File is opened with O_NDELAY (only set for block devices) */ #define FMODE_NDELAY ((__force fmode_t)0x40) /* File is opened with O_EXCL (only set for block devices) */ #define FMODE_EXCL ((__force fmode_t)0x80) /* File is opened using open(.., 3, ..) and is writeable only for ioctls (specialy hack for floppy.c) */ #define FMODE_WRITE_IOCTL ((__force fmode_t)0x100) /* 32bit hashes as llseek() offset (for directories) */ #define FMODE_32BITHASH ((__force fmode_t)0x200) /* 64bit hashes as llseek() offset (for directories) */ #define FMODE_64BITHASH ((__force fmode_t)0x400) /* * Don't update ctime and mtime. * * Currently a special hack for the XFS open_by_handle ioctl, but we'll * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. */ #define FMODE_NOCMTIME ((__force fmode_t)0x800) /* Expect random access pattern */ #define FMODE_RANDOM ((__force fmode_t)0x1000) /* File is huge (eg. /dev/kmem): treat loff_t as unsigned */ #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) /* File is opened with O_PATH; almost nothing can be done with it */ #define FMODE_PATH ((__force fmode_t)0x4000) /* File needs atomic accesses to f_pos */ #define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) /* Write access to underlying fs */ #define FMODE_WRITER ((__force fmode_t)0x10000) /* Has read method(s) */ #define FMODE_CAN_READ ((__force fmode_t)0x20000) /* Has write method(s) */ #define FMODE_CAN_WRITE ((__force fmode_t)0x40000) #define FMODE_OPENED ((__force fmode_t)0x80000) #define FMODE_CREATED ((__force fmode_t)0x100000) /* File is stream-like */ #define FMODE_STREAM ((__force fmode_t)0x200000) /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) /* File is capable of returning -EAGAIN if I/O will block */ #define FMODE_NOWAIT ((__force fmode_t)0x8000000) /* File represents mount that needs unmounting */ #define FMODE_NEED_UNMOUNT ((__force fmode_t)0x10000000) /* File does not contribute to nr_files count */ #define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) /* File supports async buffered reads */ #define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000) /* * Attribute flags. These should be or-ed together to figure out what * has been changed! */ #define ATTR_MODE (1 << 0) #define ATTR_UID (1 << 1) #define ATTR_GID (1 << 2) #define ATTR_SIZE (1 << 3) #define ATTR_ATIME (1 << 4) #define ATTR_MTIME (1 << 5) #define ATTR_CTIME (1 << 6) #define ATTR_ATIME_SET (1 << 7) #define ATTR_MTIME_SET (1 << 8) #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ #define ATTR_KILL_SUID (1 << 11) #define ATTR_KILL_SGID (1 << 12) #define ATTR_FILE (1 << 13) #define ATTR_KILL_PRIV (1 << 14) #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ #define ATTR_TIMES_SET (1 << 16) #define ATTR_TOUCH (1 << 17) /* * Whiteout is represented by a char device. The following constants define the * mode and device number to use. */ #define WHITEOUT_MODE 0 #define WHITEOUT_DEV 0 /* * This is the Inode Attributes structure, used for notify_change(). It * uses the above definitions as flags, to know which values have changed. * Also, in this manner, a Filesystem can look at only the values it cares * about. Basically, these are the attributes that the VFS layer can * request to change from the FS layer. * * Derek Atkins <warlord@MIT.EDU> 94-10-20 */ struct iattr { unsigned int ia_valid; umode_t ia_mode; kuid_t ia_uid; kgid_t ia_gid; loff_t ia_size; struct timespec64 ia_atime; struct timespec64 ia_mtime; struct timespec64 ia_ctime; /* * Not an attribute, but an auxiliary info for filesystems wanting to * implement an ftruncate() like method. NOTE: filesystem should * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). */ struct file *ia_file; }; /* * Includes for diskquotas. */ #include <linux/quota.h> /* * Maximum number of layers of fs stack. Needs to be limited to * prevent kernel stack overflow */ #define FILESYSTEM_MAX_STACK_DEPTH 2 /** * enum positive_aop_returns - aop return codes with specific semantics * * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has *