1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2016 Facebook * Copyright (C) 2013-2014 Jens Axboe */ #include <linux/sched.h> #include <linux/random.h> #include <linux/sbitmap.h> #include <linux/seq_file.h> /* * See if we have deferred clears that we can batch move */ static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index) { unsigned long mask, val; bool ret = false; unsigned long flags; spin_lock_irqsave(&sb->map[index].swap_lock, flags); if (!sb->map[index].cleared) goto out_unlock; /* * First get a stable cleared mask, setting the old mask to 0. */ mask = xchg(&sb->map[index].cleared, 0); /* * Now clear the masked bits in our free word */ do { val = sb->map[index].word; } while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val); ret = true; out_unlock: spin_unlock_irqrestore(&sb->map[index].swap_lock, flags); return ret; } int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, gfp_t flags, int node) { unsigned int bits_per_word; unsigned int i; if (shift < 0) { shift = ilog2(BITS_PER_LONG); /* * If the bitmap is small, shrink the number of bits per word so * we spread over a few cachelines, at least. If less than 4 * bits, just forget about it, it's not going to work optimally * anyway. */ if (depth >= 4) { while ((4U << shift) > depth) shift--; } } bits_per_word = 1U << shift; if (bits_per_word > BITS_PER_LONG) return -EINVAL; sb->shift = shift; sb->depth = depth; sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); if (depth == 0) { sb->map = NULL; return 0; } sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node); if (!sb->map) return -ENOMEM; for (i = 0; i < sb->map_nr; i++) { sb->map[i].depth = min(depth, bits_per_word); depth -= sb->map[i].depth; spin_lock_init(&sb->map[i].swap_lock); } return 0; } EXPORT_SYMBOL_GPL(sbitmap_init_node); void sbitmap_resize(struct sbitmap *sb, unsigned int depth) { unsigned int bits_per_word = 1U << sb->shift; unsigned int i; for (i = 0; i < sb->map_nr; i++) sbitmap_deferred_clear(sb, i); sb->depth = depth; sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); for (i = 0; i < sb->map_nr; i++) { sb->map[i].depth = min(depth, bits_per_word); depth -= sb->map[i].depth; } } EXPORT_SYMBOL_GPL(sbitmap_resize); static int __sbitmap_get_word(unsigned long *word, unsigned long depth, unsigned int hint, bool wrap) { unsigned int orig_hint = hint; int nr; while (1) { nr = find_next_zero_bit(word, depth, hint); if (unlikely(nr >= depth)) { /* * We started with an offset, and we didn't reset the * offset to 0 in a failure case, so start from 0 to * exhaust the map. */ if (orig_hint && hint && wrap) { hint = orig_hint = 0; continue; } return -1; } if (!test_and_set_bit_lock(nr, word)) break; hint = nr + 1; if (hint >= depth - 1) hint = 0; } return nr; } static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, unsigned int alloc_hint, bool round_robin) { int nr; do { nr = __sbitmap_get_word(&sb->map[index].word, sb->map[index].depth, alloc_hint, !round_robin); if (nr != -1) break; if (!sbitmap_deferred_clear(sb, index)) break; } while (1); return nr; } int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) { unsigned int i, index; int nr = -1; index = SB_NR_TO_INDEX(sb, alloc_hint); /* * Unless we're doing round robin tag allocation, just use the * alloc_hint to find the right word index. No point in looping * twice in find_next_zero_bit() for that case. */ if (round_robin) alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); else alloc_hint = 0; for (i = 0; i < sb->map_nr; i++) { nr = sbitmap_find_bit_in_index(sb, index, alloc_hint, round_robin); if (nr != -1) { nr += index << sb->shift; break; } /* Jump to next index. */ alloc_hint = 0; if (++index >= sb->map_nr) index = 0; } return nr; } EXPORT_SYMBOL_GPL(sbitmap_get); int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, unsigned long shallow_depth) { unsigned int i, index; int nr = -1; index = SB_NR_TO_INDEX(sb, alloc_hint); for (i = 0; i < sb->map_nr; i++) { again: nr = __sbitmap_get_word(&sb->map[index].word, min(sb->map[index].depth, shallow_depth), SB_NR_TO_BIT(sb, alloc_hint), true); if (nr != -1) { nr += index << sb->shift; break; } if (sbitmap_deferred_clear(sb, index)) goto again; /* Jump to next index. */ index++; alloc_hint = index << sb->shift; if (index >= sb->map_nr) { index = 0; alloc_hint = 0; } } return nr; } EXPORT_SYMBOL_GPL(sbitmap_get_shallow); bool sbitmap_any_bit_set(const struct sbitmap *sb) { unsigned int i; for (i = 0; i < sb->map_nr; i++) { if (sb->map[i].word & ~sb->map[i].cleared) return true; } return false; } EXPORT_SYMBOL_GPL(sbitmap_any_bit_set); static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set) { unsigned int i, weight = 0; for (i = 0; i < sb->map_nr; i++) { const struct sbitmap_word *word = &sb->map[i]; if (set) weight += bitmap_weight(&word->word, word->depth); else weight += bitmap_weight(&word->cleared, word->depth); } return weight; } static unsigned int sbitmap_weight(const struct sbitmap *sb) { return __sbitmap_weight(sb, true); } static unsigned int sbitmap_cleared(const struct sbitmap *sb) { return __sbitmap_weight(sb, false); } void sbitmap_show(struct sbitmap *sb, struct seq_file *m) { seq_printf(m, "depth=%u\n", sb->depth); seq_printf(m, "busy=%u\n", sbitmap_weight(sb) - sbitmap_cleared(sb)); seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb)); seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); seq_printf(m, "map_nr=%u\n", sb->map_nr); } EXPORT_SYMBOL_GPL(sbitmap_show); static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte) { if ((offset & 0xf) == 0) { if (offset != 0) seq_putc(m, '\n'); seq_printf(m, "%08x:", offset); } if ((offset & 0x1) == 0) seq_putc(m, ' '); seq_printf(m, "%02x", byte); } void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) { u8 byte = 0; unsigned int byte_bits = 0; unsigned int offset = 0; int i; for (i = 0; i < sb->map_nr; i++) { unsigned long word = READ_ONCE(sb->map[i].word); unsigned long cleared = READ_ONCE(sb->map[i].cleared); unsigned int word_bits = READ_ONCE(sb->map[i].depth); word &= ~cleared; while (word_bits > 0) { unsigned int bits = min(8 - byte_bits, word_bits); byte |= (word & (BIT(bits) - 1)) << byte_bits; byte_bits += bits; if (byte_bits == 8) { emit_byte(m, offset, byte); byte = 0; byte_bits = 0; offset++; } word >>= bits; word_bits -= bits; } } if (byte_bits) { emit_byte(m, offset, byte); offset++; } if (offset) seq_putc(m, '\n'); } EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, unsigned int depth) { unsigned int wake_batch; unsigned int shallow_depth; /* * For each batch, we wake up one queue. We need to make sure that our * batch size is small enough that the full depth of the bitmap, * potentially limited by a shallow depth, is enough to wake up all of * the queues. * * Each full word of the bitmap has bits_per_word bits, and there might * be a partial word. There are depth / bits_per_word full words and * depth % bits_per_word bits left over. In bitwise arithmetic: * * bits_per_word = 1 << shift * depth / bits_per_word = depth >> shift * depth % bits_per_word = depth & ((1 << shift) - 1) * * Each word can be limited to sbq->min_shallow_depth bits. */ shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth); depth = ((depth >> sbq->sb.shift) * shallow_depth + min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth)); wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1, SBQ_WAKE_BATCH); return wake_batch; } int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, int shift, bool round_robin, gfp_t flags, int node) { int ret; int i; ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node); if (ret) return ret; sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags); if (!sbq->alloc_hint) { sbitmap_free(&sbq->sb); return -ENOMEM; } if (depth && !round_robin) { for_each_possible_cpu(i) *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; } sbq->min_shallow_depth = UINT_MAX; sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); atomic_set(&sbq->wake_index, 0); atomic_set(&sbq->ws_active, 0); sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); if (!sbq->ws) { free_percpu(sbq->alloc_hint); sbitmap_free(&sbq->sb); return -ENOMEM; } for (i = 0; i < SBQ_WAIT_QUEUES; i++) { init_waitqueue_head(&sbq->ws[i].wait); atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch); } sbq->round_robin = round_robin; return 0; } EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, unsigned int depth) { unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth); int i; if (sbq->wake_batch != wake_batch) { WRITE_ONCE(sbq->wake_batch, wake_batch); /* * Pairs with the memory barrier in sbitmap_queue_wake_up() * to ensure that the batch size is updated before the wait * counts. */ smp_mb(); for (i = 0; i < SBQ_WAIT_QUEUES; i++) atomic_set(&sbq->ws[i].wait_cnt, 1); } } void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) { sbitmap_queue_update_wake_batch(sbq, depth); sbitmap_resize(&sbq->sb, depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_resize); int __sbitmap_queue_get(struct sbitmap_queue *sbq) { unsigned int hint, depth; int nr; hint = this_cpu_read(*sbq->alloc_hint); depth = READ_ONCE(sbq->sb.depth); if (unlikely(hint >= depth)) { hint = depth ? prandom_u32() % depth : 0; this_cpu_write(*sbq->alloc_hint, hint); } nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin); if (nr == -1) { /* If the map is full, a hint won't do us much good. */ this_cpu_write(*sbq->alloc_hint, 0); } else if (nr == hint || unlikely(sbq->round_robin)) { /* Only update the hint if we used it. */ hint = nr + 1; if (hint >= depth - 1) hint = 0; this_cpu_write(*sbq->alloc_hint, hint); } return nr; } EXPORT_SYMBOL_GPL(__sbitmap_queue_get); int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, unsigned int shallow_depth) { unsigned int hint, depth; int nr; WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth); hint = this_cpu_read(*sbq->alloc_hint); depth = READ_ONCE(sbq->sb.depth); if (unlikely(hint >= depth)) { hint = depth ? prandom_u32() % depth : 0; this_cpu_write(*sbq->alloc_hint, hint); } nr = sbitmap_get_shallow(&sbq->sb, hint, shallow_depth); if (nr == -1) { /* If the map is full, a hint won't do us much good. */ this_cpu_write(*sbq->alloc_hint, 0); } else if (nr == hint || unlikely(sbq->round_robin)) { /* Only update the hint if we used it. */ hint = nr + 1; if (hint >= depth - 1) hint = 0; this_cpu_write(*sbq->alloc_hint, hint); } return nr; } EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, unsigned int min_shallow_depth) { sbq->min_shallow_depth = min_shallow_depth; sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth); static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) { int i, wake_index; if (!atomic_read(&sbq->ws_active)) return NULL; wake_index = atomic_read(&sbq->wake_index); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[wake_index]; if (waitqueue_active(&ws->wait)) { if (wake_index != atomic_read(&sbq->wake_index)) atomic_set(&sbq->wake_index, wake_index); return ws; } wake_index = sbq_index_inc(wake_index); } return NULL; } static bool __sbq_wake_up(struct sbitmap_queue *sbq) { struct sbq_wait_state *ws; unsigned int wake_batch; int wait_cnt; ws = sbq_wake_ptr(sbq); if (!ws) return false; wait_cnt = atomic_dec_return(&ws->wait_cnt); if (wait_cnt <= 0) { int ret; wake_batch = READ_ONCE(sbq->wake_batch); /* * Pairs with the memory barrier in sbitmap_queue_resize() to * ensure that we see the batch size update before the wait * count is reset. */ smp_mb__before_atomic(); /* * For concurrent callers of this, the one that failed the * atomic_cmpxhcg() race should call this function again * to wakeup a new batch on a different 'ws'. */ ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch); if (ret == wait_cnt) { sbq_index_atomic_inc(&sbq->wake_index); wake_up_nr(&ws->wait, wake_batch); return false; } return true; } return false; } void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) { while (__sbq_wake_up(sbq)) ; } EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu) { /* * Once the clear bit is set, the bit may be allocated out. * * Orders READ/WRITE on the asssociated instance(such as request * of blk_mq) by this bit for avoiding race with re-allocation, * and its pair is the memory barrier implied in __sbitmap_get_word. * * One invariant is that the clear bit has to be zero when the bit * is in use. */ smp_mb__before_atomic(); sbitmap_deferred_clear_bit(&sbq->sb, nr); /* * Pairs with the memory barrier in set_current_state() to ensure the * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the * waiter. See the comment on waitqueue_active(). */ smp_mb__after_atomic(); sbitmap_queue_wake_up(sbq); if (likely(!sbq->round_robin && nr < sbq->sb.depth)) *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; } EXPORT_SYMBOL_GPL(sbitmap_queue_clear); void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) { int i, wake_index; /* * Pairs with the memory barrier in set_current_state() like in * sbitmap_queue_wake_up(). */ smp_mb(); wake_index = atomic_read(&sbq->wake_index); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[wake_index]; if (waitqueue_active(&ws->wait)) wake_up(&ws->wait); wake_index = sbq_index_inc(wake_index); } } EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) { bool first; int i; sbitmap_show(&sbq->sb, m); seq_puts(m, "alloc_hint={"); first = true; for_each_possible_cpu(i) { if (!first) seq_puts(m, ", "); first = false; seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i)); } seq_puts(m, "}\n"); seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); seq_printf(m, "ws_active=%d\n", atomic_read(&sbq->ws_active)); seq_puts(m, "ws={\n"); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[i]; seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n", atomic_read(&ws->wait_cnt), waitqueue_active(&ws->wait) ? "active" : "inactive"); } seq_puts(m, "}\n"); seq_printf(m, "round_robin=%d\n", sbq->round_robin); seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_show); void sbitmap_add_wait_queue(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait) { if (!sbq_wait->sbq) { sbq_wait->sbq = sbq; atomic_inc(&sbq->ws_active); add_wait_queue(&ws->wait, &sbq_wait->wait); } } EXPORT_SYMBOL_GPL(sbitmap_add_wait_queue); void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait) { list_del_init(&sbq_wait->wait.entry); if (sbq_wait->sbq) { atomic_dec(&sbq_wait->sbq->ws_active); sbq_wait->sbq = NULL; } } EXPORT_SYMBOL_GPL(sbitmap_del_wait_queue); void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait, int state) { if (!sbq_wait->sbq) { atomic_inc(&sbq->ws_active); sbq_wait->sbq = sbq; } prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state); } EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait); void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait) { finish_wait(&ws->wait, &sbq_wait->wait); if (sbq_wait->sbq) { atomic_dec(&sbq->ws_active); sbq_wait->sbq = NULL; } } EXPORT_SYMBOL_GPL(sbitmap_finish_wait);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_MROUTE_H #define __LINUX_MROUTE_H #include <linux/in.h> #include <linux/pim.h> #include <net/fib_rules.h> #include <net/fib_notifier.h> #include <uapi/linux/mroute.h> #include <linux/mroute_base.h> #include <linux/sockptr.h> #ifdef CONFIG_IP_MROUTE static inline int ip_mroute_opt(int opt) { return opt >= MRT_BASE && opt <= MRT_MAX; } int ip_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int); int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *); int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg); int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); int ip_mr_init(void); bool ipmr_rule_default(const struct fib_rule *rule); #else static inline int ip_mroute_setsockopt(struct sock *sock, int optname, sockptr_t optval, unsigned int optlen) { return -ENOPROTOOPT; } static inline int ip_mroute_getsockopt(struct sock *sock, int optname, char __user *optval, int __user *optlen) { return -ENOPROTOOPT; } static inline int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) { return -ENOIOCTLCMD; } static inline int ip_mr_init(void) { return 0; } static inline int ip_mroute_opt(int opt) { return 0; } static inline bool ipmr_rule_default(const struct fib_rule *rule) { return true; } #endif #define VIFF_STATIC 0x8000 struct mfc_cache_cmp_arg { __be32 mfc_mcastgrp; __be32 mfc_origin; }; /** * struct mfc_cache - multicast routing entries * @_c: Common multicast routing information; has to be first [for casting] * @mfc_mcastgrp: destination multicast group address * @mfc_origin: source address * @cmparg: used for rhashtable comparisons */ struct mfc_cache { struct mr_mfc _c; union { struct { __be32 mfc_mcastgrp; __be32 mfc_origin; }; struct mfc_cache_cmp_arg cmparg; }; }; struct rtmsg; int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, struct rtmsg *rtm, u32 portid); #endif
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 // SPDX-License-Identifier: GPL-2.0-or-later /* bit search implementation * * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * Copyright (C) 2008 IBM Corporation * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au> * (Inspired by David Howell's find_next_bit implementation) * * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease * size and improve performance, 2015. */ #include <linux/bitops.h> #include <linux/bitmap.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/minmax.h> #if !defined(find_next_bit) || !defined(find_next_zero_bit) || \ !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) || \ !defined(find_next_and_bit) /* * This is a common helper function for find_next_bit, find_next_zero_bit, and * find_next_and_bit. The differences are: * - The "invert" argument, which is XORed with each fetched word before * searching it for one bits. * - The optional "addr2", which is anded with "addr1" if present. */ static unsigned long _find_next_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start, unsigned long invert, unsigned long le) { unsigned long tmp, mask; if (unlikely(start >= nbits)) return nbits; tmp = addr1[start / BITS_PER_LONG]; if (addr2) tmp &= addr2[start / BITS_PER_LONG]; tmp ^= invert; /* Handle 1st word. */ mask = BITMAP_FIRST_WORD_MASK(start); if (le) mask = swab(mask); tmp &= mask; start = round_down(start, BITS_PER_LONG); while (!tmp) { start += BITS_PER_LONG; if (start >= nbits) return nbits; tmp = addr1[start / BITS_PER_LONG]; if (addr2) tmp &= addr2[start / BITS_PER_LONG]; tmp ^= invert; } if (le) tmp = swab(tmp); return min(start + __ffs(tmp), nbits); } #endif #ifndef find_next_bit /* * Find the next set bit in a memory region. */ unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { return _find_next_bit(addr, NULL, size, offset, 0UL, 0); } EXPORT_SYMBOL(find_next_bit); #endif #ifndef find_next_zero_bit unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { return _find_next_bit(addr, NULL, size, offset, ~0UL, 0); } EXPORT_SYMBOL(find_next_zero_bit); #endif #if !defined(find_next_and_bit) unsigned long find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long offset) { return _find_next_bit(addr1, addr2, size, offset, 0UL, 0); } EXPORT_SYMBOL(find_next_and_bit); #endif #ifndef find_first_bit /* * Find the first set bit in a memory region. */ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) { unsigned long idx; for (idx = 0; idx * BITS_PER_LONG < size; idx++) { if (addr[idx]) return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size); } return size; } EXPORT_SYMBOL(find_first_bit); #endif #ifndef find_first_zero_bit /* * Find the first cleared bit in a memory region. */ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) { unsigned long idx; for (idx = 0; idx * BITS_PER_LONG < size; idx++) { if (addr[idx] != ~0UL) return min(idx * BITS_PER_LONG + ffz(addr[idx]), size); } return size; } EXPORT_SYMBOL(find_first_zero_bit); #endif #ifndef find_last_bit unsigned long find_last_bit(const unsigned long *addr, unsigned long size) { if (size) { unsigned long val = BITMAP_LAST_WORD_MASK(size); unsigned long idx = (size-1) / BITS_PER_LONG; do { val &= addr[idx]; if (val) return idx * BITS_PER_LONG + __fls(val); val = ~0ul; } while (idx--); } return size; } EXPORT_SYMBOL(find_last_bit); #endif #ifdef __BIG_ENDIAN #ifndef find_next_zero_bit_le unsigned long find_next_zero_bit_le(const void *addr, unsigned long size, unsigned long offset) { return _find_next_bit(addr, NULL, size, offset, ~0UL, 1); } EXPORT_SYMBOL(find_next_zero_bit_le); #endif #ifndef find_next_bit_le unsigned long find_next_bit_le(const void *addr, unsigned long size, unsigned long offset) { return _find_next_bit(addr, NULL, size, offset, 0UL, 1); } EXPORT_SYMBOL(find_next_bit_le); #endif #endif /* __BIG_ENDIAN */ unsigned long find_next_clump8(unsigned long *clump, const unsigned long *addr, unsigned long size, unsigned long offset) { offset = find_next_bit(addr, size, offset); if (offset == size) return size; offset = round_down(offset, 8); *clump = bitmap_get_value8(addr, offset); return offset; } EXPORT_SYMBOL(find_next_clump8);
1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ioctl.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/syscalls.h> #include <linux/mm.h> #include <linux/capability.h> #include <linux/compat.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/security.h> #include <linux/export.h> #include <linux/uaccess.h> #include <linux/writeback.h> #include <linux/buffer_head.h> #include <linux/falloc.h> #include <linux/sched/signal.h> #include <linux/fiemap.h> #include "internal.h" #include <asm/ioctls.h> /* So that the fiemap access checks can't overflow on 32 bit machines. */ #define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent)) /** * vfs_ioctl - call filesystem specific ioctl methods * @filp: open file to invoke ioctl method on * @cmd: ioctl command to execute * @arg: command-specific argument for ioctl * * Invokes filesystem specific ->unlocked_ioctl, if one exists; otherwise * returns -ENOTTY. * * Returns 0 on success, -errno on error. */ long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { int error = -ENOTTY; if (!filp->f_op->unlocked_ioctl) goto out; error = filp->f_op->unlocked_ioctl(filp, cmd, arg); if (error == -ENOIOCTLCMD) error = -ENOTTY; out: return error; } EXPORT_SYMBOL(vfs_ioctl); static int ioctl_fibmap(struct file *filp, int __user *p) { struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; int error, ur_block; sector_t block; if (!capable(CAP_SYS_RAWIO)) return -EPERM; error = get_user(ur_block, p); if (error) return error; if (ur_block < 0) return -EINVAL; block = ur_block; error = bmap(inode, &block); if (block > INT_MAX) { error = -ERANGE; pr_warn_ratelimited("[%s/%d] FS: %s File: %pD4 would truncate fibmap result\n", current->comm, task_pid_nr(current), sb->s_id, filp); } if (error) ur_block = 0; else ur_block = block; if (put_user(ur_block, p)) error = -EFAULT; return error; } /** * fiemap_fill_next_extent - Fiemap helper function * @fieinfo: Fiemap context passed into ->fiemap * @logical: Extent logical start offset, in bytes * @phys: Extent physical start offset, in bytes * @len: Extent length, in bytes * @flags: FIEMAP_EXTENT flags that describe this extent * * Called from file system ->fiemap callback. Will populate extent * info as passed in via arguments and copy to user memory. On * success, extent count on fieinfo is incremented. * * Returns 0 on success, -errno on error, 1 if this was the last * extent that will fit in user array. */ #define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC) #define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED) #define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE) int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, u64 phys, u64 len, u32 flags) { struct fiemap_extent extent; struct fiemap_extent __user *dest = fieinfo->fi_extents_start; /* only count the extents */ if (fieinfo->fi_extents_max == 0) { fieinfo->fi_extents_mapped++; return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; } if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max) return 1; if (flags & SET_UNKNOWN_FLAGS) flags |= FIEMAP_EXTENT_UNKNOWN; if (flags & SET_NO_UNMOUNTED_IO_FLAGS) flags |= FIEMAP_EXTENT_ENCODED; if (flags & SET_NOT_ALIGNED_FLAGS) flags |= FIEMAP_EXTENT_NOT_ALIGNED; memset(&extent, 0, sizeof(extent)); extent.fe_logical = logical; extent.fe_physical = phys; extent.fe_length = len; extent.fe_flags = flags; dest += fieinfo->fi_extents_mapped; if (copy_to_user(dest, &extent, sizeof(extent))) return -EFAULT; fieinfo->fi_extents_mapped++; if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max) return 1; return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; } EXPORT_SYMBOL(fiemap_fill_next_extent); /** * fiemap_prep - check validity of requested flags for fiemap * @inode: Inode to operate on * @fieinfo: Fiemap context passed into ->fiemap * @start: Start of the mapped range * @len: Length of the mapped range, can be truncated by this function. * @supported_flags: Set of fiemap flags that the file system understands * * This function must be called from each ->fiemap instance to validate the * fiemap request against the file system parameters. * * Returns 0 on success, or a negative error on failure. */ int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 *len, u32 supported_flags) { u64 maxbytes = inode->i_sb->s_maxbytes; u32 incompat_flags; int ret = 0; if (*len == 0) return -EINVAL; if (start > maxbytes) return -EFBIG; /* * Shrink request scope to what the fs can actually handle. */ if (*len > maxbytes || (maxbytes - *len) < start) *len = maxbytes - start; supported_flags |= FIEMAP_FLAG_SYNC; supported_flags &= FIEMAP_FLAGS_COMPAT; incompat_flags = fieinfo->fi_flags & ~supported_flags; if (incompat_flags) { fieinfo->fi_flags = incompat_flags; return -EBADR; } if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) ret = filemap_write_and_wait(inode->i_mapping); return ret; } EXPORT_SYMBOL(fiemap_prep); static int ioctl_fiemap(struct file *filp, struct fiemap __user *ufiemap) { struct fiemap fiemap; struct fiemap_extent_info fieinfo = { 0, }; struct inode *inode = file_inode(filp); int error; if (!inode->i_op->fiemap) return -EOPNOTSUPP; if (copy_from_user(&fiemap, ufiemap, sizeof(fiemap))) return -EFAULT; if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) return -EINVAL; fieinfo.fi_flags = fiemap.fm_flags; fieinfo.fi_extents_max = fiemap.fm_extent_count; fieinfo.fi_extents_start = ufiemap->fm_extents; error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, fiemap.fm_length); fiemap.fm_flags = fieinfo.fi_flags; fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap))) error = -EFAULT; return error; } static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, u64 off, u64 olen, u64 destoff) { struct fd src_file = fdget(srcfd); loff_t cloned; int ret; if (!src_file.file) return -EBADF; ret = -EXDEV; if (src_file.file->f_path.mnt != dst_file->f_path.mnt) goto fdput; cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen, 0); if (cloned < 0) ret = cloned; else if (olen && cloned != olen) ret = -EINVAL; else ret = 0; fdput: fdput(src_file); return ret; } static long ioctl_file_clone_range(struct file *file, struct file_clone_range __user *argp) { struct file_clone_range args; if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT; return ioctl_file_clone(file, args.src_fd, args.src_offset, args.src_length, args.dest_offset); } #ifdef CONFIG_BLOCK static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) { return (offset >> inode->i_blkbits); } static inline loff_t blk_to_logical(struct inode *inode, sector_t blk) { return (blk << inode->i_blkbits); } /** * __generic_block_fiemap - FIEMAP for block based inodes (no locking) * @inode: the inode to map * @fieinfo: the fiemap info struct that will be passed back to userspace * @start: where to start mapping in the inode * @len: how much space to map * @get_block: the fs's get_block function * * This does FIEMAP for block based inodes. Basically it will just loop * through get_block until we hit the number of extents we want to map, or we * go past the end of the file and hit a hole. * * If it is possible to have data blocks beyond a hole past @inode->i_size, then * please do not use this function, it will stop at the first unmapped block * beyond i_size. * * If you use this function directly, you need to do your own locking. Use * generic_block_fiemap if you want the locking done for you. */ static int __generic_block_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, loff_t start, loff_t len, get_block_t *get_block) { struct buffer_head map_bh; sector_t start_blk, last_blk; loff_t isize = i_size_read(inode); u64 logical = 0, phys = 0, size = 0; u32 flags = FIEMAP_EXTENT_MERGED; bool past_eof = false, whole_file = false; int ret = 0; ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_SYNC); if (ret) return ret; /* * Either the i_mutex or other appropriate locking needs to be held * since we expect isize to not change at all through the duration of * this call. */ if (len >= isize) { whole_file = true; len = isize; } /* * Some filesystems can't deal with being asked to map less than * blocksize, so make sure our len is at least block length. */ if (logical_to_blk(inode, len) == 0) len = blk_to_logical(inode, 1); start_blk = logical_to_blk(inode, start); last_blk = logical_to_blk(inode, start + len - 1); do { /* * we set b_size to the total size we want so it will map as * many contiguous blocks as possible at once */ memset(&map_bh, 0, sizeof(struct buffer_head)); map_bh.b_size = len; ret = get_block(inode, start_blk, &map_bh, 0); if (ret) break; /* HOLE */ if (!buffer_mapped(&map_bh)) { start_blk++; /* * We want to handle the case where there is an * allocated block at the front of the file, and then * nothing but holes up to the end of the file properly, * to make sure that extent at the front gets properly * marked with FIEMAP_EXTENT_LAST */ if (!past_eof && blk_to_logical(inode, start_blk) >= isize) past_eof = 1; /* * First hole after going past the EOF, this is our * last extent */ if (past_eof && size) { flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST; ret = fiemap_fill_next_extent(fieinfo, logical, phys, size, flags); } else if (size) { ret = fiemap_fill_next_extent(fieinfo, logical, phys, size, flags); size = 0; } /* if we have holes up to/past EOF then we're done */ if (start_blk > last_blk || past_eof || ret) break; } else { /* * We have gone over the length of what we wanted to * map, and it wasn't the entire file, so add the extent * we got last time and exit. * * This is for the case where say we want to map all the * way up to the second to the last block in a file, but * the last block is a hole, making the second to last * block FIEMAP_EXTENT_LAST. In this case we want to * see if there is a hole after the second to last block * so we can mark it properly. If we found data after * we exceeded the length we were requesting, then we * are good to go, just add the extent to the fieinfo * and break */ if (start_blk > last_blk && !whole_file) { ret = fiemap_fill_next_extent(fieinfo, logical, phys, size, flags); break; } /* * if size != 0 then we know we already have an extent * to add, so add it. */ if (size) { ret = fiemap_fill_next_extent(fieinfo, logical, phys, size, flags); if (ret) break; } logical = blk_to_logical(inode, start_blk); phys = blk_to_logical(inode, map_bh.b_blocknr); size = map_bh.b_size; flags = FIEMAP_EXTENT_MERGED; start_blk += logical_to_blk(inode, size); /* * If we are past the EOF, then we need to make sure as * soon as we find a hole that the last extent we found * is marked with FIEMAP_EXTENT_LAST */ if (!past_eof && logical + size >= isize) past_eof = true; } cond_resched(); if (fatal_signal_pending(current)) { ret = -EINTR; break; } } while (1); /* If ret is 1 then we just hit the end of the extent array */ if (ret == 1) ret = 0; return ret; } /** * generic_block_fiemap - FIEMAP for block based inodes * @inode: The inode to map * @fieinfo: The mapping information * @start: The initial block to map * @len: The length of the extect to attempt to map * @get_block: The block mapping function for the fs * * Calls __generic_block_fiemap to map the inode, after taking * the inode's mutex lock. */ int generic_block_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len, get_block_t *get_block) { int ret; inode_lock(inode); ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block); inode_unlock(inode); return ret; } EXPORT_SYMBOL(generic_block_fiemap); #endif /* CONFIG_BLOCK */ /* * This provides compatibility with legacy XFS pre-allocation ioctls * which predate the fallocate syscall. * * Only the l_start, l_len and l_whence fields of the 'struct space_resv' * are used here, rest are ignored. */ static int ioctl_preallocate(struct file *filp, int mode, void __user *argp) { struct inode *inode = file_inode(filp); struct space_resv sr; if (copy_from_user(&sr, argp, sizeof(sr))) return -EFAULT; switch (sr.l_whence) { case SEEK_SET: break; case SEEK_CUR: sr.l_start += filp->f_pos; break; case SEEK_END: sr.l_start += i_size_read(inode); break; default: return -EINVAL; } return vfs_fallocate(filp, mode | FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len); } /* on ia32 l_start is on a 32-bit boundary */ #if defined CONFIG_COMPAT && defined(CONFIG_X86_64) /* just account for different alignment */ static int compat_ioctl_preallocate(struct file *file, int mode, struct space_resv_32 __user *argp) { struct inode *inode = file_inode(file); struct space_resv_32 sr; if (copy_from_user(&sr, argp, sizeof(sr))) return -EFAULT; switch (sr.l_whence) { case SEEK_SET: break; case SEEK_CUR: sr.l_start += file->f_pos; break; case SEEK_END: sr.l_start += i_size_read(inode); break; default: return -EINVAL; } return vfs_fallocate(file, mode | FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len); } #endif static int file_ioctl(struct file *filp, unsigned int cmd, int __user *p) { switch (cmd) { case FIBMAP: return ioctl_fibmap(filp, p); case FS_IOC_RESVSP: case FS_IOC_RESVSP64: return ioctl_preallocate(filp, 0, p); case FS_IOC_UNRESVSP: case FS_IOC_UNRESVSP64: return ioctl_preallocate(filp, FALLOC_FL_PUNCH_HOLE, p); case FS_IOC_ZERO_RANGE: return ioctl_preallocate(filp, FALLOC_FL_ZERO_RANGE, p); } return -ENOIOCTLCMD; } static int ioctl_fionbio(struct file *filp, int __user *argp) { unsigned int flag; int on, error; error = get_user(on, argp); if (error) return error; flag = O_NONBLOCK; #ifdef __sparc__ /* SunOS compatibility item. */ if (O_NONBLOCK != O_NDELAY) flag |= O_NDELAY; #endif spin_lock(&filp->f_lock); if (on) filp->f_flags |= flag; else filp->f_flags &= ~flag; spin_unlock(&filp->f_lock); return error; } static int ioctl_fioasync(unsigned int fd, struct file *filp, int __user *argp) { unsigned int flag; int on, error; error = get_user(on, argp); if (error) return error; flag = on ? FASYNC : 0; /* Did FASYNC state change ? */ if ((flag ^ filp->f_flags) & FASYNC) { if (filp->f_op->fasync) /* fasync() adjusts filp->f_flags */ error = filp->f_op->fasync(fd, filp, on); else error = -ENOTTY; } return error < 0 ? error : 0; } static int ioctl_fsfreeze(struct file *filp) { struct super_block *sb = file_inode(filp)->i_sb; if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) return -EPERM; /* If filesystem doesn't support freeze feature, return. */ if (sb->s_op->freeze_fs == NULL && sb->s_op->freeze_super == NULL) return -EOPNOTSUPP; /* Freeze */ if (sb->s_op->freeze_super) return sb->s_op->freeze_super(sb); return freeze_super(sb); } static int ioctl_fsthaw(struct file *filp) { struct super_block *sb = file_inode(filp)->i_sb; if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) return -EPERM; /* Thaw */ if (sb->s_op->thaw_super) return sb->s_op->thaw_super(sb); return thaw_super(sb); } static int ioctl_file_dedupe_range(struct file *file, struct file_dedupe_range __user *argp) { struct file_dedupe_range *same = NULL; int ret; unsigned long size; u16 count; if (get_user(count, &argp->dest_count)) { ret = -EFAULT; goto out; } size = offsetof(struct file_dedupe_range __user, info[count]); if (size > PAGE_SIZE) { ret = -ENOMEM; goto out; } same = memdup_user(argp, size); if (IS_ERR(same)) { ret = PTR_ERR(same); same = NULL; goto out; } same->dest_count = count; ret = vfs_dedupe_file_range(file, same); if (ret) goto out; ret = copy_to_user(argp, same, size); if (ret) ret = -EFAULT; out: kfree(same); return ret; } /* * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d. * It's just a simple helper for sys_ioctl and compat_sys_ioctl. * * When you add any new common ioctls to the switches above and below, * please ensure they have compatible arguments in compat mode. */ static int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct inode *inode = file_inode(filp); switch (cmd) { case FIOCLEX: set_close_on_exec(fd, 1); return 0; case FIONCLEX: set_close_on_exec(fd, 0); return 0; case FIONBIO: return ioctl_fionbio(filp, argp); case FIOASYNC: return ioctl_fioasync(fd, filp, argp); case FIOQSIZE: if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) { loff_t res = inode_get_bytes(inode); return copy_to_user(argp, &res, sizeof(res)) ? -EFAULT : 0; } return -ENOTTY; case FIFREEZE: return ioctl_fsfreeze(filp); case FITHAW: return ioctl_fsthaw(filp); case FS_IOC_FIEMAP: return ioctl_fiemap(filp, argp); case FIGETBSZ: /* anon_bdev filesystems may not have a block size */ if (!inode->i_sb->s_blocksize) return -EINVAL; return put_user(inode->i_sb->s_blocksize, (int __user *)argp); case FICLONE: return ioctl_file_clone(filp, arg, 0, 0, 0); case FICLONERANGE: return ioctl_file_clone_range(filp, argp); case FIDEDUPERANGE: return ioctl_file_dedupe_range(filp, argp); case FIONREAD: if (!S_ISREG(inode->i_mode)) return vfs_ioctl(filp, cmd, arg); return put_user(i_size_read(inode) - filp->f_pos, (int __user *)argp); default: if (S_ISREG(inode->i_mode)) return file_ioctl(filp, cmd, argp); break; } return -ENOIOCTLCMD; } SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { struct fd f = fdget(fd); int error; if (!f.file) return -EBADF; error = security_file_ioctl(f.file, cmd, arg); if (error) goto out; error = do_vfs_ioctl(f.file, fd, cmd, arg); if (error == -ENOIOCTLCMD) error = vfs_ioctl(f.file, cmd, arg); out: fdput(f); return error; } #ifdef CONFIG_COMPAT /** * compat_ptr_ioctl - generic implementation of .compat_ioctl file operation * * This is not normally called as a function, but instead set in struct * file_operations as * * .compat_ioctl = compat_ptr_ioctl, * * On most architectures, the compat_ptr_ioctl() just passes all arguments * to the corresponding ->ioctl handler. The exception is arch/s390, where * compat_ptr() clears the top bit of a 32-bit pointer value, so user space * pointers to the second 2GB alias the first 2GB, as is the case for * native 32-bit s390 user space. * * The compat_ptr_ioctl() function must therefore be used only with ioctl * functions that either ignore the argument or pass a pointer to a * compatible data type. * * If any ioctl command handled by fops->unlocked_ioctl passes a plain * integer instead of a pointer, or any of the passed data types * is incompatible between 32-bit and 64-bit architectures, a proper * handler is required instead of compat_ptr_ioctl. */ long compat_ptr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { if (!file->f_op->unlocked_ioctl) return -ENOIOCTLCMD; return file->f_op->unlocked_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); } EXPORT_SYMBOL(compat_ptr_ioctl); COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, compat_ulong_t, arg) { struct fd f = fdget(fd); int error; if (!f.file) return -EBADF; /* RED-PEN how should LSM module know it's handling 32bit? */ error = security_file_ioctl(f.file, cmd, arg); if (error) goto out; switch (cmd) { /* FICLONE takes an int argument, so don't use compat_ptr() */ case FICLONE: error = ioctl_file_clone(f.file, arg, 0, 0, 0); break; #if defined(CONFIG_X86_64) /* these get messy on amd64 due to alignment differences */ case FS_IOC_RESVSP_32: case FS_IOC_RESVSP64_32: error = compat_ioctl_preallocate(f.file, 0, compat_ptr(arg)); break; case FS_IOC_UNRESVSP_32: case FS_IOC_UNRESVSP64_32: error = compat_ioctl_preallocate(f.file, FALLOC_FL_PUNCH_HOLE, compat_ptr(arg)); break; case FS_IOC_ZERO_RANGE_32: error = compat_ioctl_preallocate(f.file, FALLOC_FL_ZERO_RANGE, compat_ptr(arg)); break; #endif /* * everything else in do_vfs_ioctl() takes either a compatible * pointer argument or no argument -- call it with a modified * argument. */ default: error = do_vfs_ioctl(f.file, fd, cmd, (unsigned long)compat_ptr(arg)); if (error != -ENOIOCTLCMD) break; if (f.file->f_op->compat_ioctl) error = f.file->f_op->compat_ioctl(f.file, cmd, arg); if (error == -ENOIOCTLCMD) error = -ENOTTY; break; } out: fdput(f); return error; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_COMPAT_H #define _ASM_X86_COMPAT_H /* * Architecture specific compatibility types */ #include <linux/types.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> #include <asm/processor.h> #include <asm/user32.h> #include <asm/unistd.h> #include <asm-generic/compat.h> #define COMPAT_USER_HZ 100 #define COMPAT_UTS_MACHINE "i686\0\0" typedef u16 __compat_uid_t; typedef u16 __compat_gid_t; typedef u32 __compat_uid32_t; typedef u32 __compat_gid32_t; typedef u16 compat_mode_t; typedef u16 compat_dev_t; typedef u16 compat_nlink_t; typedef u16 compat_ipc_pid_t; typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; struct compat_stat { compat_dev_t st_dev; u16 __pad1; compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; __compat_uid_t st_uid; __compat_gid_t st_gid; compat_dev_t st_rdev; u16 __pad2; u32 st_size; u32 st_blksize; u32 st_blocks; u32 st_atime; u32 st_atime_nsec; u32 st_mtime; u32 st_mtime_nsec; u32 st_ctime; u32 st_ctime_nsec; u32 __unused4; u32 __unused5; }; struct compat_flock { short l_type; short l_whence; compat_off_t l_start; compat_off_t l_len; compat_pid_t l_pid; }; #define F_GETLK64 12 /* using 'struct flock64' */ #define F_SETLK64 13 #define F_SETLKW64 14 /* * IA32 uses 4 byte alignment for 64 bit quantities, * so we need to pack this structure. */ struct compat_flock64 { short l_type; short l_whence; compat_loff_t l_start; compat_loff_t l_len; compat_pid_t l_pid; } __attribute__((packed)); struct compat_statfs { int f_type; int f_bsize; int f_blocks; int f_bfree; int f_bavail; int f_files; int f_ffree; compat_fsid_t f_fsid; int f_namelen; /* SunOS ignores this field. */ int f_frsize; int f_flags; int f_spare[4]; }; #define COMPAT_RLIM_INFINITY 0xffffffff typedef u32 compat_old_sigset_t; /* at least 32 bits */ #define _COMPAT_NSIG 64 #define _COMPAT_NSIG_BPW 32 typedef u32 compat_sigset_word; #define COMPAT_OFF_T_MAX 0x7fffffff struct compat_ipc64_perm { compat_key_t key; __compat_uid32_t uid; __compat_gid32_t gid; __compat_uid32_t cuid; __compat_gid32_t cgid; unsigned short mode; unsigned short __pad1; unsigned short seq; unsigned short __pad2; compat_ulong_t unused1; compat_ulong_t unused2; }; struct compat_semid64_ds { struct compat_ipc64_perm sem_perm; compat_ulong_t sem_otime; compat_ulong_t sem_otime_high; compat_ulong_t sem_ctime; compat_ulong_t sem_ctime_high; compat_ulong_t sem_nsems; compat_ulong_t __unused3; compat_ulong_t __unused4; }; struct compat_msqid64_ds { struct compat_ipc64_perm msg_perm; compat_ulong_t msg_stime; compat_ulong_t msg_stime_high; compat_ulong_t msg_rtime; compat_ulong_t msg_rtime_high; compat_ulong_t msg_ctime; compat_ulong_t msg_ctime_high; compat_ulong_t msg_cbytes; compat_ulong_t msg_qnum; compat_ulong_t msg_qbytes; compat_pid_t msg_lspid; compat_pid_t msg_lrpid; compat_ulong_t __unused4; compat_ulong_t __unused5; }; struct compat_shmid64_ds { struct compat_ipc64_perm shm_perm; compat_size_t shm_segsz; compat_ulong_t shm_atime; compat_ulong_t shm_atime_high; compat_ulong_t shm_dtime; compat_ulong_t shm_dtime_high; compat_ulong_t shm_ctime; compat_ulong_t shm_ctime_high; compat_pid_t shm_cpid; compat_pid_t shm_lpid; compat_ulong_t shm_nattch; compat_ulong_t __unused4; compat_ulong_t __unused5; }; /* * The type of struct elf_prstatus.pr_reg in compatible core dumps. */ typedef struct user_regs_struct compat_elf_gregset_t; /* Full regset -- prstatus on x32, otherwise on ia32 */ #define PRSTATUS_SIZE(S, R) (R != sizeof(S.pr_reg) ? 144 : 296) #define SET_PR_FPVALID(S, V, R) \ do { *(int *) (((void *) &((S)->pr_reg)) + R) = (V); } \ while (0) #ifdef CONFIG_X86_X32_ABI #define COMPAT_USE_64BIT_TIME \ (!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)) #endif static inline void __user *arch_compat_alloc_user_space(long len) { compat_uptr_t sp; if (test_thread_flag(TIF_IA32)) { sp = task_pt_regs(current)->sp; } else { /* -128 for the x32 ABI redzone */ sp = task_pt_regs(current)->sp - 128; } return (void __user *)round_down(sp - len, 16); } static inline bool in_x32_syscall(void) { #ifdef CONFIG_X86_X32_ABI if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT) return true; #endif return false; } static inline bool in_32bit_syscall(void) { return in_ia32_syscall() || in_x32_syscall(); } #ifdef CONFIG_COMPAT static inline bool in_compat_syscall(void) { return in_32bit_syscall(); } #define in_compat_syscall in_compat_syscall /* override the generic impl */ #define compat_need_64bit_alignment_fixup in_ia32_syscall #endif struct compat_siginfo; #ifdef CONFIG_X86_X32_ABI int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginfo_t *from); #define copy_siginfo_to_user32 copy_siginfo_to_user32 #endif /* CONFIG_X86_X32_ABI */ #endif /* _ASM_X86_COMPAT_H */
1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 // SPDX-License-Identifier: GPL-2.0-only #include <linux/extable.h> #include <linux/uaccess.h> #include <linux/sched/debug.h> #include <xen/xen.h> #include <asm/fpu/internal.h> #include <asm/sev-es.h> #include <asm/traps.h> #include <asm/kdebug.h> typedef bool (*ex_handler_t)(const struct exception_table_entry *, struct pt_regs *, int, unsigned long, unsigned long); static inline unsigned long ex_fixup_addr(const struct exception_table_entry *x) { return (unsigned long)&x->fixup + x->fixup; } static inline ex_handler_t ex_fixup_handler(const struct exception_table_entry *x) { return (ex_handler_t)((unsigned long)&x->handler + x->handler); } __visible bool ex_handler_default(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr) { regs->ip = ex_fixup_addr(fixup); return true; } EXPORT_SYMBOL(ex_handler_default); __visible bool ex_handler_fault(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr) { regs->ip = ex_fixup_addr(fixup); regs->ax = trapnr; return true; } EXPORT_SYMBOL_GPL(ex_handler_fault); /* * Handler for when we fail to restore a task's FPU state. We should never get * here because the FPU state of a task using the FPU (task->thread.fpu.state) * should always be valid. However, past bugs have allowed userspace to set * reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn(). * These caused XRSTOR to fail when switching to the task, leaking the FPU * registers of the task previously executing on the CPU. Mitigate this class * of vulnerability by restoring from the initial state (essentially, zeroing * out all the FPU registers) if we can't restore from the task's FPU state. */ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr) { regs->ip = ex_fixup_addr(fixup); WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.", (void *)instruction_pointer(regs)); __copy_kernel_to_fpregs(&init_fpstate, -1); return true; } EXPORT_SYMBOL_GPL(ex_handler_fprestore); __visible bool ex_handler_uaccess(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr) { WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?"); regs->ip = ex_fixup_addr(fixup); return true; } EXPORT_SYMBOL(ex_handler_uaccess); __visible bool ex_handler_copy(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr) { WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?"); regs->ip = ex_fixup_addr(fixup); regs->ax = trapnr; return true; } EXPORT_SYMBOL(ex_handler_copy); __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr) { if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", (unsigned int)regs->cx, regs->ip, (void *)regs->ip)) show_stack_regs(regs); /* Pretend that the read succeeded and returned 0. */ regs->ip = ex_fixup_addr(fixup); regs->ax = 0; regs->dx = 0; return true; } EXPORT_SYMBOL(ex_handler_rdmsr_unsafe); __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr) { if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax, regs->ip, (void *)regs->ip)) show_stack_regs(regs); /* Pretend that the write succeeded. */ regs->ip = ex_fixup_addr(fixup); return true; } EXPORT_SYMBOL(ex_handler_wrmsr_unsafe); __visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr) { if (static_cpu_has(X86_BUG_NULL_SEG)) asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS)); asm volatile ("mov %0, %%fs" : : "rm" (0)); return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); } EXPORT_SYMBOL(ex_handler_clear_fs); enum handler_type ex_get_fault_handler_type(unsigned long ip) { const struct exception_table_entry *e; ex_handler_t handler; e = search_exception_tables(ip); if (!e) return EX_HANDLER_NONE; handler = ex_fixup_handler(e); if (handler == ex_handler_fault) return EX_HANDLER_FAULT; else if (handler == ex_handler_uaccess || handler == ex_handler_copy) return EX_HANDLER_UACCESS; else return EX_HANDLER_OTHER; } int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr) { const struct exception_table_entry *e; ex_handler_t handler; #ifdef CONFIG_PNPBIOS if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) { extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; extern u32 pnp_bios_is_utter_crap; pnp_bios_is_utter_crap = 1; printk(KERN_CRIT "PNPBIOS fault.. attempting recovery.\n"); __asm__ volatile( "movl %0, %%esp\n\t" "jmp *%1\n\t" : : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip)); panic("do_trap: can't hit this"); } #endif e = search_exception_tables(regs->ip); if (!e) return 0; handler = ex_fixup_handler(e); return handler(e, regs, trapnr, error_code, fault_addr); } extern unsigned int early_recursion_flag; /* Restricted version used during very early boot */ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) { /* Ignore early NMIs. */ if (trapnr == X86_TRAP_NMI) return; if (early_recursion_flag > 2) goto halt_loop; /* * Old CPUs leave the high bits of CS on the stack * undefined. I'm not sure which CPUs do this, but at least * the 486 DX works this way. * Xen pv domains are not using the default __KERNEL_CS. */ if (!xen_pv_domain() && regs->cs != __KERNEL_CS) goto fail; /* * The full exception fixup machinery is available as soon as * the early IDT is loaded. This means that it is the * responsibility of extable users to either function correctly * when handlers are invoked early or to simply avoid causing * exceptions before they're ready to handle them. * * This is better than filtering which handlers can be used, * because refusing to call a handler here is guaranteed to * result in a hard-to-debug panic. * * Keep in mind that not all vectors actually get here. Early * page faults, for example, are special. */ if (fixup_exception(regs, trapnr, regs->orig_ax, 0)) return; if (trapnr == X86_TRAP_UD) { if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) { /* Skip the ud2. */ regs->ip += LEN_UD2; return; } /* * If this was a BUG and report_bug returns or if this * was just a normal #UD, we want to continue onward and * crash. */ } fail: early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n", (unsigned)trapnr, (unsigned long)regs->cs, regs->ip, regs->orig_ax, read_cr2()); show_regs(regs); halt_loop: while (true) halt(); }
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 /* SPDX-License-Identifier: GPL-2.0 */ /* * Security server interface. * * Author : Stephen Smalley, <sds@tycho.nsa.gov> * */ #ifndef _SELINUX_SECURITY_H_ #define _SELINUX_SECURITY_H_ #include <linux/compiler.h> #include <linux/dcache.h> #include <linux/magic.h> #include <linux/types.h> #include <linux/rcupdate.h> #include <linux/refcount.h> #include <linux/workqueue.h> #include "flask.h" #include "policycap.h" #define SECSID_NULL 0x00000000 /* unspecified SID */ #define SECSID_WILD 0xffffffff /* wildcard SID */ #define SECCLASS_NULL 0x0000 /* no class */ /* Identify specific policy version changes */ #define POLICYDB_VERSION_BASE 15 #define POLICYDB_VERSION_BOOL 16 #define POLICYDB_VERSION_IPV6 17 #define POLICYDB_VERSION_NLCLASS 18 #define POLICYDB_VERSION_VALIDATETRANS 19 #define POLICYDB_VERSION_MLS 19 #define POLICYDB_VERSION_AVTAB 20 #define POLICYDB_VERSION_RANGETRANS 21 #define POLICYDB_VERSION_POLCAP 22 #define POLICYDB_VERSION_PERMISSIVE 23 #define POLICYDB_VERSION_BOUNDARY 24 #define POLICYDB_VERSION_FILENAME_TRANS 25 #define POLICYDB_VERSION_ROLETRANS 26 #define POLICYDB_VERSION_NEW_OBJECT_DEFAULTS 27 #define POLICYDB_VERSION_DEFAULT_TYPE 28 #define POLICYDB_VERSION_CONSTRAINT_NAMES 29 #define POLICYDB_VERSION_XPERMS_IOCTL 30 #define POLICYDB_VERSION_INFINIBAND 31 #define POLICYDB_VERSION_GLBLUB 32 #define POLICYDB_VERSION_COMP_FTRANS 33 /* compressed filename transitions */ /* Range of policy versions we understand*/ #define POLICYDB_VERSION_MIN POLICYDB_VERSION_BASE #define POLICYDB_VERSION_MAX POLICYDB_VERSION_COMP_FTRANS /* Mask for just the mount related flags */ #define SE_MNTMASK 0x0f /* Super block security struct flags for mount options */ /* BE CAREFUL, these need to be the low order bits for selinux_get_mnt_opts */ #define CONTEXT_MNT 0x01 #define FSCONTEXT_MNT 0x02 #define ROOTCONTEXT_MNT 0x04 #define DEFCONTEXT_MNT 0x08 #define SBLABEL_MNT 0x10 /* Non-mount related flags */ #define SE_SBINITIALIZED 0x0100 #define SE_SBPROC 0x0200 #define SE_SBGENFS 0x0400 #define SE_SBGENFS_XATTR 0x0800 #define CONTEXT_STR "context" #define FSCONTEXT_STR "fscontext" #define ROOTCONTEXT_STR "rootcontext" #define DEFCONTEXT_STR "defcontext" #define SECLABEL_STR "seclabel" struct netlbl_lsm_secattr; extern int selinux_enabled_boot; /* * type_datum properties * available at the kernel policy version >= POLICYDB_VERSION_BOUNDARY */ #define TYPEDATUM_PROPERTY_PRIMARY 0x0001 #define TYPEDATUM_PROPERTY_ATTRIBUTE 0x0002 /* limitation of boundary depth */ #define POLICYDB_BOUNDS_MAXDEPTH 4 struct selinux_avc; struct selinux_policy; struct selinux_state { #ifdef CONFIG_SECURITY_SELINUX_DISABLE bool disabled; #endif #ifdef CONFIG_SECURITY_SELINUX_DEVELOP bool enforcing; #endif bool checkreqprot; bool initialized; bool policycap[__POLICYDB_CAPABILITY_MAX]; struct page *status_page; struct mutex status_lock; struct selinux_avc *avc; struct selinux_policy __rcu *policy; struct mutex policy_mutex; } __randomize_layout; void selinux_avc_init(struct selinux_avc **avc); extern struct selinux_state selinux_state; static inline bool selinux_initialized(const struct selinux_state *state) { /* do a synchronized load to avoid race conditions */ return smp_load_acquire(&state->initialized); } static inline void selinux_mark_initialized(struct selinux_state *state) { /* do a synchronized write to avoid race conditions */ smp_store_release(&state->initialized, true); } #ifdef CONFIG_SECURITY_SELINUX_DEVELOP static inline bool enforcing_enabled(struct selinux_state *state) { return READ_ONCE(state->enforcing); } static inline void enforcing_set(struct selinux_state *state, bool value) { WRITE_ONCE(state->enforcing, value); } #else static inline bool enforcing_enabled(struct selinux_state *state) { return true; } static inline void enforcing_set(struct selinux_state *state, bool value) { } #endif static inline bool checkreqprot_get(const struct selinux_state *state) { return READ_ONCE(state->checkreqprot); } static inline void checkreqprot_set(struct selinux_state *state, bool value) { WRITE_ONCE(state->checkreqprot, value); } #ifdef CONFIG_SECURITY_SELINUX_DISABLE static inline bool selinux_disabled(struct selinux_state *state) { return READ_ONCE(state->disabled); } static inline void selinux_mark_disabled(struct selinux_state *state) { WRITE_ONCE(state->disabled, true); } #else static inline bool selinux_disabled(struct selinux_state *state) { return false; } #endif static inline bool selinux_policycap_netpeer(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_NETPEER]); } static inline bool selinux_policycap_openperm(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_OPENPERM]); } static inline bool selinux_policycap_extsockclass(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_EXTSOCKCLASS]); } static inline bool selinux_policycap_alwaysnetwork(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_ALWAYSNETWORK]); } static inline bool selinux_policycap_cgroupseclabel(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_CGROUPSECLABEL]); } static inline bool selinux_policycap_nnp_nosuid_transition(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION]); } static inline bool selinux_policycap_genfs_seclabel_symlinks(void) { struct selinux_state *state = &selinux_state; return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS]); } struct selinux_policy_convert_data; struct selinux_load_state { struct selinux_policy *policy; struct selinux_policy_convert_data *convert_data; }; int security_mls_enabled(struct selinux_state *state); int security_load_policy(struct selinux_state *state, void *data, size_t len, struct selinux_load_state *load_state); void selinux_policy_commit(struct selinux_state *state, struct selinux_load_state *load_state); void selinux_policy_cancel(struct selinux_state *state, struct selinux_load_state *load_state); int security_read_policy(struct selinux_state *state, void **data, size_t *len); int security_policycap_supported(struct selinux_state *state, unsigned int req_cap); #define SEL_VEC_MAX 32 struct av_decision { u32 allowed; u32 auditallow; u32 auditdeny; u32 seqno; u32 flags; }; #define XPERMS_ALLOWED 1 #define XPERMS_AUDITALLOW 2 #define XPERMS_DONTAUDIT 4 #define security_xperm_set(perms, x) (perms[x >> 5] |= 1 << (x & 0x1f)) #define security_xperm_test(perms, x) (1 & (perms[x >> 5] >> (x & 0x1f))) struct extended_perms_data { u32 p[8]; }; struct extended_perms_decision { u8 used; u8 driver; struct extended_perms_data *allowed; struct extended_perms_data *auditallow; struct extended_perms_data *dontaudit; }; struct extended_perms { u16 len; /* length associated decision chain */ struct extended_perms_data drivers; /* flag drivers that are used */ }; /* definitions of av_decision.flags */ #define AVD_FLAGS_PERMISSIVE 0x0001 void security_compute_av(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, struct av_decision *avd, struct extended_perms *xperms); void security_compute_xperms_decision(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u8 driver, struct extended_perms_decision *xpermd); void security_compute_av_user(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, struct av_decision *avd); int security_transition_sid(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, const struct qstr *qstr, u32 *out_sid); int security_transition_sid_user(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, const char *objname, u32 *out_sid); int security_member_sid(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 *out_sid); int security_change_sid(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 *out_sid); int security_sid_to_context(struct selinux_state *state, u32 sid, char **scontext, u32 *scontext_len); int security_sid_to_context_force(struct selinux_state *state, u32 sid, char **scontext, u32 *scontext_len); int security_sid_to_context_inval(struct selinux_state *state, u32 sid, char **scontext, u32 *scontext_len); int security_context_to_sid(struct selinux_state *state, const char *scontext, u32 scontext_len, u32 *out_sid, gfp_t gfp); int security_context_str_to_sid(struct selinux_state *state, const char *scontext, u32 *out_sid, gfp_t gfp); int security_context_to_sid_default(struct selinux_state *state, const char *scontext, u32 scontext_len, u32 *out_sid, u32 def_sid, gfp_t gfp_flags); int security_context_to_sid_force(struct selinux_state *state, const char *scontext, u32 scontext_len, u32 *sid); int security_get_user_sids(struct selinux_state *state, u32 callsid, char *username, u32 **sids, u32 *nel); int security_port_sid(struct selinux_state *state, u8 protocol, u16 port, u32 *out_sid); int security_ib_pkey_sid(struct selinux_state *state, u64 subnet_prefix, u16 pkey_num, u32 *out_sid); int security_ib_endport_sid(struct selinux_state *state, const char *dev_name, u8 port_num, u32 *out_sid); int security_netif_sid(struct selinux_state *state, char *name, u32 *if_sid); int security_node_sid(struct selinux_state *state, u16 domain, void *addr, u32 addrlen, u32 *out_sid); int security_validate_transition(struct selinux_state *state, u32 oldsid, u32 newsid, u32 tasksid, u16 tclass); int security_validate_transition_user(struct selinux_state *state, u32 oldsid, u32 newsid, u32 tasksid, u16 tclass); int security_bounded_transition(struct selinux_state *state, u32 oldsid, u32 newsid); int security_sid_mls_copy(struct selinux_state *state, u32 sid, u32 mls_sid, u32 *new_sid); int security_net_peersid_resolve(struct selinux_state *state, u32 nlbl_sid, u32 nlbl_type, u32 xfrm_sid, u32 *peer_sid); int security_get_classes(struct selinux_policy *policy, char ***classes, int *nclasses); int security_get_permissions(struct selinux_policy *policy, char *class, char ***perms, int *nperms); int security_get_reject_unknown(struct selinux_state *state); int security_get_allow_unknown(struct selinux_state *state); #define SECURITY_FS_USE_XATTR 1 /* use xattr */ #define SECURITY_FS_USE_TRANS 2 /* use transition SIDs, e.g. devpts/tmpfs */ #define SECURITY_FS_USE_TASK 3 /* use task SIDs, e.g. pipefs/sockfs */ #define SECURITY_FS_USE_GENFS 4 /* use the genfs support */ #define SECURITY_FS_USE_NONE 5 /* no labeling support */ #define SECURITY_FS_USE_MNTPOINT 6 /* use mountpoint labeling */ #define SECURITY_FS_USE_NATIVE 7 /* use native label support */ #define SECURITY_FS_USE_MAX 7 /* Highest SECURITY_FS_USE_XXX */ int security_fs_use(struct selinux_state *state, struct super_block *sb); int security_genfs_sid(struct selinux_state *state, const char *fstype, char *name, u16 sclass, u32 *sid); int selinux_policy_genfs_sid(struct selinux_policy *policy, const char *fstype, char *name, u16 sclass, u32 *sid); #ifdef CONFIG_NETLABEL int security_netlbl_secattr_to_sid(struct selinux_state *state, struct netlbl_lsm_secattr *secattr, u32 *sid); int security_netlbl_sid_to_secattr(struct selinux_state *state, u32 sid, struct netlbl_lsm_secattr *secattr); #else static inline int security_netlbl_secattr_to_sid(struct selinux_state *state, struct netlbl_lsm_secattr *secattr, u32 *sid) { return -EIDRM; } static inline int security_netlbl_sid_to_secattr(struct selinux_state *state, u32 sid, struct netlbl_lsm_secattr *secattr) { return -ENOENT; } #endif /* CONFIG_NETLABEL */ const char *security_get_initial_sid_context(u32 sid); /* * status notifier using mmap interface */ extern struct page *selinux_kernel_status_page(struct selinux_state *state); #define SELINUX_KERNEL_STATUS_VERSION 1 struct selinux_kernel_status { u32 version; /* version number of thie structure */ u32 sequence; /* sequence number of seqlock logic */ u32 enforcing; /* current setting of enforcing mode */ u32 policyload; /* times of policy reloaded */ u32 deny_unknown; /* current setting of deny_unknown */ /* * The version > 0 supports above members. */ } __packed; extern void selinux_status_update_setenforce(struct selinux_state *state, int enforcing); extern void selinux_status_update_policyload(struct selinux_state *state, int seqno); extern void selinux_complete_init(void); extern int selinux_disable(struct selinux_state *state); extern void exit_sel_fs(void); extern struct path selinux_null; extern struct vfsmount *selinuxfs_mount; extern void selnl_notify_setenforce(int val); extern void selnl_notify_policyload(u32 seqno); extern int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm); extern void avtab_cache_init(void); extern void ebitmap_cache_init(void); extern void hashtab_cache_init(void); extern int security_sidtab_hash_stats(struct selinux_state *state, char *page); #endif /* _SELINUX_SECURITY_H_ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 /* SPDX-License-Identifier: GPL-2.0 */ /* Rewritten and vastly simplified by Rusty Russell for in-kernel * module loader: * Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation */ #ifndef _LINUX_KALLSYMS_H #define _LINUX_KALLSYMS_H #include <linux/errno.h> #include <linux/kernel.h> #include <linux/stddef.h> #include <linux/mm.h> #include <linux/module.h> #include <asm/sections.h> #define KSYM_NAME_LEN 128 #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) struct cred; struct module; static inline int is_kernel_inittext(unsigned long addr) { if (addr >= (unsigned long)_sinittext && addr <= (unsigned long)_einittext) return 1; return 0; } static inline int is_kernel_text(unsigned long addr) { if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) || arch_is_kernel_text(addr)) return 1; return in_gate_area_no_mm(addr); } static inline int is_kernel(unsigned long addr) { if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) return 1; return in_gate_area_no_mm(addr); } static inline int is_ksym_addr(unsigned long addr) { if (IS_ENABLED(CONFIG_KALLSYMS_ALL)) return is_kernel(addr); return is_kernel_text(addr) || is_kernel_inittext(addr); } static inline void *dereference_symbol_descriptor(void *ptr) { #ifdef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR struct module *mod; ptr = dereference_kernel_function_descriptor(ptr); if (is_ksym_addr((unsigned long)ptr)) return ptr; preempt_disable(); mod = __module_address((unsigned long)ptr); preempt_enable(); if (mod) ptr = dereference_module_function_descriptor(mod, ptr); #endif return ptr; } #ifdef CONFIG_KALLSYMS /* Lookup the address for a symbol. Returns 0 if not found. */ unsigned long kallsyms_lookup_name(const char *name); /* Call a function on each kallsyms symbol in the core kernel */ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data); extern int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset); /* Lookup an address. modname is set to NULL if it's in the kernel. */ const char *kallsyms_lookup(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, char *namebuf); /* Look up a kernel symbol and return it in a text buffer. */ extern int sprint_symbol(char *buffer, unsigned long address); extern int sprint_symbol_no_offset(char *buffer, unsigned long address); extern int sprint_backtrace(char *buffer, unsigned long address); int lookup_symbol_name(unsigned long addr, char *symname); int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name); /* How and when do we show kallsyms values? */ extern bool kallsyms_show_value(const struct cred *cred); #else /* !CONFIG_KALLSYMS */ static inline unsigned long kallsyms_lookup_name(const char *name) { return 0; } static inline int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data) { return 0; } static inline int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset) { return 0; } static inline const char *kallsyms_lookup(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, char *namebuf) { return NULL; } static inline int sprint_symbol(char *buffer, unsigned long addr) { *buffer = '\0'; return 0; } static inline int sprint_symbol_no_offset(char *buffer, unsigned long addr) { *buffer = '\0'; return 0; } static inline int sprint_backtrace(char *buffer, unsigned long addr) { *buffer = '\0'; return 0; } static inline int lookup_symbol_name(unsigned long addr, char *symname) { return -ERANGE; } static inline int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name) { return -ERANGE; } static inline bool kallsyms_show_value(const struct cred *cred) { return false; } #endif /*CONFIG_KALLSYMS*/ static inline void print_ip_sym(const char *loglvl, unsigned long ip) { printk("%s[<%px>] %pS\n", loglvl, (void *) ip, (void *) ip); } #endif /*_LINUX_KALLSYMS_H*/
1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 // SPDX-License-Identifier: GPL-2.0-only /* File: fs/xattr.c Extended attribute handling. Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org> Copyright (C) 2001 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com> Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> */ #include <linux/fs.h> #include <linux/slab.h> #include <linux/file.h> #include <linux/xattr.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/security.h> #include <linux/evm.h> #include <linux/syscalls.h> #include <linux/export.h> #include <linux/fsnotify.h> #include <linux/audit.h> #include <linux/vmalloc.h> #include <linux/posix_acl_xattr.h> #include <linux/uaccess.h> static const char * strcmp_prefix(const char *a, const char *a_prefix) { while (*a_prefix && *a == *a_prefix) { a++; a_prefix++; } return *a_prefix ? NULL : a; } /* * In order to implement different sets of xattr operations for each xattr * prefix, a filesystem should create a null-terminated array of struct * xattr_handler (one for each prefix) and hang a pointer to it off of the * s_xattr field of the superblock. */ #define for_each_xattr_handler(handlers, handler) \ if (handlers) \ for ((handler) = *(handlers)++; \ (handler) != NULL; \ (handler) = *(handlers)++) /* * Find the xattr_handler with the matching prefix. */ static const struct xattr_handler * xattr_resolve_name(struct inode *inode, const char **name) { const struct xattr_handler **handlers = inode->i_sb->s_xattr; const struct xattr_handler *handler; if (!(inode->i_opflags & IOP_XATTR)) { if (unlikely(is_bad_inode(inode))) return ERR_PTR(-EIO); return ERR_PTR(-EOPNOTSUPP); } for_each_xattr_handler(handlers, handler) { const char *n; n = strcmp_prefix(*name, xattr_prefix(handler)); if (n) { if (!handler->prefix ^ !*n) { if (*n) continue; return ERR_PTR(-EINVAL); } *name = n; return handler; } } return ERR_PTR(-EOPNOTSUPP); } /* * Check permissions for extended attribute access. This is a bit complicated * because different namespaces have very different rules. */ static int xattr_permission(struct inode *inode, const char *name, int mask) { /* * We can never set or remove an extended attribute on a read-only * filesystem or on an immutable / append-only inode. */ if (mask & MAY_WRITE) { if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return -EPERM; /* * Updating an xattr will likely cause i_uid and i_gid * to be writen back improperly if their true value is * unknown to the vfs. */ if (HAS_UNMAPPED_ID(inode)) return -EPERM; } /* * No restriction for security.* and system.* from the VFS. Decision * on these is left to the underlying filesystem / security module. */ if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) || !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) return 0; /* * The trusted.* namespace can only be accessed by privileged users. */ if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) { if (!capable(CAP_SYS_ADMIN)) return (mask & MAY_WRITE) ? -EPERM : -ENODATA; return 0; } /* * In the user.* namespace, only regular files and directories can have * extended attributes. For sticky directories, only the owner and * privileged users can write attributes. */ if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) { if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) return (mask & MAY_WRITE) ? -EPERM : -ENODATA; if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && (mask & MAY_WRITE) && !inode_owner_or_capable(inode)) return -EPERM; } return inode_permission(inode, mask); } /* * Look for any handler that deals with the specified namespace. */ int xattr_supported_namespace(struct inode *inode, const char *prefix) { const struct xattr_handler **handlers = inode->i_sb->s_xattr; const struct xattr_handler *handler; size_t preflen; if (!(inode->i_opflags & IOP_XATTR)) { if (unlikely(is_bad_inode(inode))) return -EIO; return -EOPNOTSUPP; } preflen = strlen(prefix); for_each_xattr_handler(handlers, handler) { if (!strncmp(xattr_prefix(handler), prefix, preflen)) return 0; } return -EOPNOTSUPP; } EXPORT_SYMBOL(xattr_supported_namespace); int __vfs_setxattr(struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) { const struct xattr_handler *handler; handler = xattr_resolve_name(inode, &name); if (IS_ERR(handler)) return PTR_ERR(handler); if (!handler->set) return -EOPNOTSUPP; if (size == 0) value = ""; /* empty EA, do not remove */ return handler->set(handler, dentry, inode, name, value, size, flags); } EXPORT_SYMBOL(__vfs_setxattr); /** * __vfs_setxattr_noperm - perform setxattr operation without performing * permission checks. * * @dentry - object to perform setxattr on * @name - xattr name to set * @value - value to set @name to * @size - size of @value * @flags - flags to pass into filesystem operations * * returns the result of the internal setxattr or setsecurity operations. * * This function requires the caller to lock the inode's i_mutex before it * is executed. It also assumes that the caller will make the appropriate * permission checks. */ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { struct inode *inode = dentry->d_inode; int error = -EAGAIN; int issec = !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); if (issec) inode->i_flags &= ~S_NOSEC; if (inode->i_opflags & IOP_XATTR) { error = __vfs_setxattr(dentry, inode, name, value, size, flags); if (!error) { fsnotify_xattr(dentry); security_inode_post_setxattr(dentry, name, value, size, flags); } } else { if (unlikely(is_bad_inode(inode))) return -EIO; } if (error == -EAGAIN) { error = -EOPNOTSUPP; if (issec) { const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; error = security_inode_setsecurity(inode, suffix, value, size, flags); if (!error) fsnotify_xattr(dentry); } } return error; } /** * __vfs_setxattr_locked - set an extended attribute while holding the inode * lock * * @dentry: object to perform setxattr on * @name: xattr name to set * @value: value to set @name to * @size: size of @value * @flags: flags to pass into filesystem operations * @delegated_inode: on return, will contain an inode pointer that * a delegation was broken on, NULL if none. */ int __vfs_setxattr_locked(struct dentry *dentry, const char *name, const void *value, size_t size, int flags, struct inode **delegated_inode) { struct inode *inode = dentry->d_inode; int error; error = xattr_permission(inode, name, MAY_WRITE); if (error) return error; error = security_inode_setxattr(dentry, name, value, size, flags); if (error) goto out; error = try_break_deleg(inode, delegated_inode); if (error) goto out; error = __vfs_setxattr_noperm(dentry, name, value, size, flags); out: return error; } EXPORT_SYMBOL_GPL(__vfs_setxattr_locked); int vfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { struct inode *inode = dentry->d_inode; struct inode *delegated_inode = NULL; int error; retry_deleg: inode_lock(inode); error = __vfs_setxattr_locked(dentry, name, value, size, flags, &delegated_inode); inode_unlock(inode); if (delegated_inode) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; } return error; } EXPORT_SYMBOL_GPL(vfs_setxattr); static ssize_t xattr_getsecurity(struct inode *inode, const char *name, void *value, size_t size) { void *buffer = NULL; ssize_t len; if (!value || !size) { len = security_inode_getsecurity(inode, name, &buffer, false); goto out_noalloc; } len = security_inode_getsecurity(inode, name, &buffer, true); if (len < 0) return len; if (size < len) { len = -ERANGE; goto out; } memcpy(value, buffer, len); out: kfree(buffer); out_noalloc: return len; } /* * vfs_getxattr_alloc - allocate memory, if necessary, before calling getxattr * * Allocate memory, if not already allocated, or re-allocate correct size, * before retrieving the extended attribute. * * Returns the result of alloc, if failed, or the getxattr operation. */ ssize_t vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value, size_t xattr_size, gfp_t flags) { const struct xattr_handler *handler; struct inode *inode = dentry->d_inode; char *value = *xattr_value; int error; error = xattr_permission(inode, name, MAY_READ); if (error) return error; handler = xattr_resolve_name(inode, &name); if (IS_ERR(handler)) return PTR_ERR(handler); if (!handler->get) return -EOPNOTSUPP; error = handler->get(handler, dentry, inode, name, NULL, 0); if (error < 0) return error; if (!value || (error > xattr_size)) { value = krealloc(*xattr_value, error + 1, flags); if (!value) return -ENOMEM; memset(value, 0, error + 1); } error = handler->get(handler, dentry, inode, name, value, error); *xattr_value = value; return error; } ssize_t __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { const struct xattr_handler *handler; handler = xattr_resolve_name(inode, &name); if (IS_ERR(handler)) return PTR_ERR(handler); if (!handler->get) return -EOPNOTSUPP; return handler->get(handler, dentry, inode, name, value, size); } EXPORT_SYMBOL(__vfs_getxattr); ssize_t vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) { struct inode *inode = dentry->d_inode; int error; error = xattr_permission(inode, name, MAY_READ); if (error) return error; error = security_inode_getxattr(dentry, name); if (error) return error; if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) { const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; int ret = xattr_getsecurity(inode, suffix, value, size); /* * Only overwrite the return value if a security module * is actually active. */ if (ret == -EOPNOTSUPP) goto nolsm; return ret; } nolsm: return __vfs_getxattr(dentry, inode, name, value, size); } EXPORT_SYMBOL_GPL(vfs_getxattr); ssize_t vfs_listxattr(struct dentry *dentry, char *list, size_t size) { struct inode *inode = d_inode(dentry); ssize_t error; error = security_inode_listxattr(dentry); if (error) return error; if (inode->i_op->listxattr && (inode->i_opflags & IOP_XATTR)) { error = inode->i_op->listxattr(dentry, list, size); } else { error = security_inode_listsecurity(inode, list, size); if (size && error > size) error = -ERANGE; } return error; } EXPORT_SYMBOL_GPL(vfs_listxattr); int __vfs_removexattr(struct dentry *dentry, const char *name) { struct inode *inode = d_inode(dentry); const struct xattr_handler *handler; handler = xattr_resolve_name(inode, &name); if (IS_ERR(handler)) return PTR_ERR(handler); if (!handler->set) return -EOPNOTSUPP; return handler->set(handler, dentry, inode, name, NULL, 0, XATTR_REPLACE); } EXPORT_SYMBOL(__vfs_removexattr); /** * __vfs_removexattr_locked - set an extended attribute while holding the inode * lock * * @dentry: object to perform setxattr on * @name: name of xattr to remove * @delegated_inode: on return, will contain an inode pointer that * a delegation was broken on, NULL if none. */ int __vfs_removexattr_locked(struct dentry *dentry, const char *name, struct inode **delegated_inode) { struct inode *inode = dentry->d_inode; int error; error = xattr_permission(inode, name, MAY_WRITE); if (error) return error; error = security_inode_removexattr(dentry, name); if (error) goto out; error = try_break_deleg(inode, delegated_inode); if (error) goto out; error = __vfs_removexattr(dentry, name); if (!error) { fsnotify_xattr(dentry); evm_inode_post_removexattr(dentry, name); } out: return error; } EXPORT_SYMBOL_GPL(__vfs_removexattr_locked); int vfs_removexattr(struct dentry *dentry, const char *name) { struct inode *inode = dentry->d_inode; struct inode *delegated_inode = NULL; int error; retry_deleg: inode_lock(inode); error = __vfs_removexattr_locked(dentry, name, &delegated_inode); inode_unlock(inode); if (delegated_inode) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; } return error; } EXPORT_SYMBOL_GPL(vfs_removexattr); /* * Extended attribute SET operations */ static long setxattr(struct dentry *d, const char __user *name, const void __user *value, size_t size, int flags) { int error; void *kvalue = NULL; char kname[XATTR_NAME_MAX + 1]; if (flags & ~(XATTR_CREATE|XATTR_REPLACE)) return -EINVAL; error = strncpy_from_user(kname, name, sizeof(kname)); if (error == 0 || error == sizeof(kname)) error = -ERANGE; if (error < 0) return error; if (size) { if (size > XATTR_SIZE_MAX) return -E2BIG; kvalue = kvmalloc(size, GFP_KERNEL); if (!kvalue) return -ENOMEM; if (copy_from_user(kvalue, value, size)) { error = -EFAULT; goto out; } if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) posix_acl_fix_xattr_from_user(kvalue, size); else if (strcmp(kname, XATTR_NAME_CAPS) == 0) { error = cap_convert_nscap(d, &kvalue, size); if (error < 0) goto out; size = error; } } error = vfs_setxattr(d, kname, kvalue, size, flags); out: kvfree(kvalue); return error; } static int path_setxattr(const char __user *pathname, const char __user *name, const void __user *value, size_t size, int flags, unsigned int lookup_flags) { struct path path; int error; retry: error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) return error; error = mnt_want_write(path.mnt); if (!error) { error = setxattr(path.dentry, name, value, size, flags); mnt_drop_write(path.mnt); } path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } return error; } SYSCALL_DEFINE5(setxattr, const char __user *, pathname, const char __user *, name, const void __user *, value, size_t, size, int, flags) { return path_setxattr(pathname, name, value, size, flags, LOOKUP_FOLLOW); } SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname, const char __user *, name, const void __user *, value, size_t, size, int, flags) { return path_setxattr(pathname, name, value, size, flags, 0); } SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, const void __user *,value, size_t, size, int, flags) { struct fd f = fdget(fd); int error = -EBADF; if (!f.file) return error; audit_file(f.file); error = mnt_want_write_file(f.file); if (!error) { error = setxattr(f.file->f_path.dentry, name, value, size, flags); mnt_drop_write_file(f.file); } fdput(f); return error; } /* * Extended attribute GET operations */ static ssize_t getxattr(struct dentry *d, const char __user *name, void __user *value, size_t size) { ssize_t error; void *kvalue = NULL; char kname[XATTR_NAME_MAX + 1]; error = strncpy_from_user(kname, name, sizeof(kname)); if (error == 0 || error == sizeof(kname)) error = -ERANGE; if (error < 0) return error; if (size) { if (size > XATTR_SIZE_MAX) size = XATTR_SIZE_MAX; kvalue = kvzalloc(size, GFP_KERNEL); if (!kvalue) return -ENOMEM; } error = vfs_getxattr(d, kname, kvalue, size); if (error > 0) { if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) posix_acl_fix_xattr_to_user(kvalue, error); if (size && copy_to_user(value, kvalue, error)) error = -EFAULT; } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { /* The file system tried to returned a value bigger than XATTR_SIZE_MAX bytes. Not possible. */ error = -E2BIG; } kvfree(kvalue); return error; } static ssize_t path_getxattr(const char __user *pathname, const char __user *name, void __user *value, size_t size, unsigned int lookup_flags) { struct path path; ssize_t error; retry: error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) return error; error = getxattr(path.dentry, name, value, size); path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } return error; } SYSCALL_DEFINE4(getxattr, const char __user *, pathname, const char __user *, name, void __user *, value, size_t, size) { return path_getxattr(pathname, name, value, size, LOOKUP_FOLLOW); } SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname, const char __user *, name, void __user *, value, size_t, size) { return path_getxattr(pathname, name, value, size, 0); } SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size) { struct fd f = fdget(fd); ssize_t error = -EBADF; if (!f.file) return error; audit_file(f.file); error = getxattr(f.file->f_path.dentry, name, value, size); fdput(f); return error; } /* * Extended attribute LIST operations */ static ssize_t listxattr(struct dentry *d, char __user *list, size_t size) { ssize_t error; char *klist = NULL; if (size) { if (size > XATTR_LIST_MAX) size = XATTR_LIST_MAX; klist = kvmalloc(size, GFP_KERNEL); if (!klist) return -ENOMEM; } error = vfs_listxattr(d, klist, size); if (error > 0) { if (size && copy_to_user(list, klist, error)) error = -EFAULT; } else if (error == -ERANGE && size >= XATTR_LIST_MAX) { /* The file system tried to returned a list bigger than XATTR_LIST_MAX bytes. Not possible. */ error = -E2BIG; } kvfree(klist); return error; } static ssize_t path_listxattr(const char __user *pathname, char __user *list, size_t size, unsigned int lookup_flags) { struct path path; ssize_t error; retry: error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) return error; error = listxattr(path.dentry, list, size); path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } return error; } SYSCALL_DEFINE3(listxattr, const char __user *, pathname, char __user *, list, size_t, size) { return path_listxattr(pathname, list, size, LOOKUP_FOLLOW); } SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list, size_t, size) { return path_listxattr(pathname, list, size, 0); } SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) { struct fd f = fdget(fd); ssize_t error = -EBADF; if (!f.file) return error; audit_file(f.file); error = listxattr(f.file->f_path.dentry, list, size); fdput(f); return error; } /* * Extended attribute REMOVE operations */ static long removexattr(struct dentry *d, const char __user *name) { int error; char kname[XATTR_NAME_MAX + 1]; error = strncpy_from_user(kname, name, sizeof(kname)); if (error == 0 || error == sizeof(kname)) error = -ERANGE; if (error < 0) return error; return vfs_removexattr(d, kname); } static int path_removexattr(const char __user *pathname, const char __user *name, unsigned int lookup_flags) { struct path path; int error; retry: error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) return error; error = mnt_want_write(path.mnt); if (!error) { error = removexattr(path.dentry, name); mnt_drop_write(path.mnt); } path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } return error; } SYSCALL_DEFINE2(removexattr, const char __user *, pathname, const char __user *, name) { return path_removexattr(pathname, name, LOOKUP_FOLLOW); } SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname, const char __user *, name) { return path_removexattr(pathname, name, 0); } SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) { struct fd f = fdget(fd); int error = -EBADF; if (!f.file) return error; audit_file(f.file); error = mnt_want_write_file(f.file); if (!error) { error = removexattr(f.file->f_path.dentry, name); mnt_drop_write_file(f.file); } fdput(f); return error; } /* * Combine the results of the list() operation from every xattr_handler in the * list. */ ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) { const struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr; unsigned int size = 0; if (!buffer) { for_each_xattr_handler(handlers, handler) { if (!handler->name || (handler->list && !handler->list(dentry))) continue; size += strlen(handler->name) + 1; } } else { char *buf = buffer; size_t len; for_each_xattr_handler(handlers, handler) { if (!handler->name || (handler->list && !handler->list(dentry))) continue; len = strlen(handler->name); if (len + 1 > buffer_size) return -ERANGE; memcpy(buf, handler->name, len + 1); buf += len + 1; buffer_size -= len + 1; } size = buf - buffer; } return size; } EXPORT_SYMBOL(generic_listxattr); /** * xattr_full_name - Compute full attribute name from suffix * * @handler: handler of the xattr_handler operation * @name: name passed to the xattr_handler operation * * The get and set xattr handler operations are called with the remainder of * the attribute name after skipping the handler's prefix: for example, "foo" * is passed to the get operation of a handler with prefix "user." to get * attribute "user.foo". The full name is still "there" in the name though. * * Note: the list xattr handler operation when called from the vfs is passed a * NULL name; some file systems use this operation internally, with varying * semantics. */ const char *xattr_full_name(const struct xattr_handler *handler, const char *name) { size_t prefix_len = strlen(xattr_prefix(handler)); return name - prefix_len; } EXPORT_SYMBOL(xattr_full_name); /* * Allocate new xattr and copy in the value; but leave the name to callers. */ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size) { struct simple_xattr *new_xattr; size_t len; /* wrap around? */ len = sizeof(*new_xattr) + size; if (len < sizeof(*new_xattr)) return NULL; new_xattr = kvmalloc(len, GFP_KERNEL); if (!new_xattr) return NULL; new_xattr->size = size; memcpy(new_xattr->value, value, size); return new_xattr; } /* * xattr GET operation for in-memory/pseudo filesystems */ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, void *buffer, size_t size) { struct simple_xattr *xattr; int ret = -ENODATA; spin_lock(&xattrs->lock); list_for_each_entry(xattr, &xattrs->head, list) { if (strcmp(name, xattr->name)) continue; ret = xattr->size; if (buffer) { if (size < xattr->size) ret = -ERANGE; else memcpy(buffer, xattr->value, xattr->size); } break; } spin_unlock(&xattrs->lock); return ret; } /** * simple_xattr_set - xattr SET operation for in-memory/pseudo filesystems * @xattrs: target simple_xattr list * @name: name of the extended attribute * @value: value of the xattr. If %NULL, will remove the attribute. * @size: size of the new xattr * @flags: %XATTR_{CREATE|REPLACE} * @removed_size: returns size of the removed xattr, -1 if none removed * * %XATTR_CREATE is set, the xattr shouldn't exist already; otherwise fails * with -EEXIST. If %XATTR_REPLACE is set, the xattr should exist; * otherwise, fails with -ENODATA. * * Returns 0 on success, -errno on failure. */ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, const void *value, size_t size, int flags, ssize_t *removed_size) { struct simple_xattr *xattr; struct simple_xattr *new_xattr = NULL; int err = 0; if (removed_size) *removed_size = -1; /* value == NULL means remove */ if (value) { new_xattr = simple_xattr_alloc(value, size); if (!new_xattr) return -ENOMEM; new_xattr->name = kstrdup(name, GFP_KERNEL); if (!new_xattr->name) { kvfree(new_xattr); return -ENOMEM; } } spin_lock(&xattrs->lock); list_for_each_entry(xattr, &xattrs->head, list) { if (!strcmp(name, xattr->name)) { if (flags & XATTR_CREATE) { xattr = new_xattr; err = -EEXIST; } else if (new_xattr) { list_replace(&xattr->list, &new_xattr->list); if (removed_size) *removed_size = xattr->size; } else { list_del(&xattr->list); if (removed_size) *removed_size = xattr->size; } goto out; } } if (flags & XATTR_REPLACE) { xattr = new_xattr; err = -ENODATA; } else { list_add(&new_xattr->list, &xattrs->head); xattr = NULL; } out: spin_unlock(&xattrs->lock); if (xattr) { kfree(xattr->name); kvfree(xattr); } return err; } static bool xattr_is_trusted(const char *name) { return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); } static int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name) { size_t len = strlen(name) + 1; if (*buffer) { if (*remaining_size < len) return -ERANGE; memcpy(*buffer, name, len); *buffer += len; } *remaining_size -= len; return 0; } /* * xattr LIST operation for in-memory/pseudo filesystems */ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer, size_t size) { bool trusted = capable(CAP_SYS_ADMIN); struct simple_xattr *xattr; ssize_t remaining_size = size; int err = 0; #ifdef CONFIG_FS_POSIX_ACL if (IS_POSIXACL(inode)) { if (inode->i_acl) { err = xattr_list_one(&buffer, &remaining_size, XATTR_NAME_POSIX_ACL_ACCESS); if (err) return err; } if (inode->i_default_acl) { err = xattr_list_one(&buffer, &remaining_size, XATTR_NAME_POSIX_ACL_DEFAULT); if (err) return err; } } #endif spin_lock(&xattrs->lock); list_for_each_entry(xattr, &xattrs->head, list) { /* skip "trusted." attributes for unprivileged callers */ if (!trusted && xattr_is_trusted(xattr->name)) continue; err = xattr_list_one(&buffer, &remaining_size, xattr->name); if (err) break; } spin_unlock(&xattrs->lock); return err ? err : size - remaining_size; } /* * Adds an extended attribute to the list */ void simple_xattr_list_add(struct simple_xattrs *xattrs, struct simple_xattr *new_xattr) { spin_lock(&xattrs->lock); list_add(&new_xattr->list, &xattrs->head); spin_unlock(&xattrs->lock); }
1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BLKDEV_H #define _LINUX_BLKDEV_H #include <linux/sched.h> #include <linux/sched/clock.h> #include <linux/major.h> #include <linux/genhd.h> #include <linux/list.h> #include <linux/llist.h> #include <linux/minmax.h> #include <linux/timer.h> #include <linux/workqueue.h> #include <linux/pagemap.h> #include <linux/backing-dev-defs.h> #include <linux/wait.h> #include <linux/mempool.h> #include <linux/pfn.h> #include <linux/bio.h> #include <linux/stringify.h> #include <linux/gfp.h> #include <linux/bsg.h> #include <linux/smp.h> #include <linux/rcupdate.h> #include <linux/percpu-refcount.h> #include <linux/scatterlist.h> #include <linux/blkzoned.h> #include <linux/pm.h> struct module; struct scsi_ioctl_command; struct request_queue; struct elevator_queue; struct blk_trace; struct request; struct sg_io_hdr; struct bsg_job; struct blkcg_gq; struct blk_flush_queue; struct pr_ops; struct rq_qos; struct blk_queue_stats; struct blk_stat_callback; struct blk_keyslot_manager; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ /* Must be consistent with blk_mq_poll_stats_bkt() */ #define BLK_MQ_POLL_STATS_BKTS 16 /* Doing classic polling */ #define BLK_MQ_POLL_CLASSIC -1 /* * Maximum number of blkcg policies allowed to be registered concurrently. * Defined here to simplify include dependency. */ #define BLKCG_MAX_POLS 5 static inline int blk_validate_block_size(unsigned int bsize) { if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) return -EINVAL; return 0; } typedef void (rq_end_io_fn)(struct request *, blk_status_t); /* * request flags */ typedef __u32 __bitwise req_flags_t; /* elevator knows about this request */ #define RQF_SORTED ((__force req_flags_t)(1 << 0)) /* drive already may have started this one */ #define RQF_STARTED ((__force req_flags_t)(1 << 1)) /* may not be passed by ioscheduler */ #define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3)) /* request for flush sequence */ #define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4)) /* merge of different types, fail separately */ #define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5)) /* track inflight for MQ */ #define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6)) /* don't call prep for this one */ #define RQF_DONTPREP ((__force req_flags_t)(1 << 7)) /* vaguely specified driver internal error. Ignored by the block layer */ #define RQF_FAILED ((__force req_flags_t)(1 << 10)) /* don't warn about errors */ #define RQF_QUIET ((__force req_flags_t)(1 << 11)) /* elevator private data attached */ #define RQF_ELVPRIV ((__force req_flags_t)(1 << 12)) /* account into disk and partition IO statistics */ #define RQF_IO_STAT ((__force req_flags_t)(1 << 13)) /* request came from our alloc pool */ #define RQF_ALLOCED ((__force req_flags_t)(1 << 14)) /* runtime pm request */ #define RQF_PM ((__force req_flags_t)(1 << 15)) /* on IO scheduler merge hash */ #define RQF_HASHED ((__force req_flags_t)(1 << 16)) /* track IO completion time */ #define RQF_STATS ((__force req_flags_t)(1 << 17)) /* Look at ->special_vec for the actual data payload instead of the bio chain. */ #define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) /* The per-zone write lock is held for this request */ #define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) /* already slept for hybrid poll */ #define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20)) /* ->timeout has been called, don't expire again */ #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) /* flags that prevent us from merging requests: */ #define RQF_NOMERGE_FLAGS \ (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD) /* * Request state for blk-mq. */ enum mq_rq_state { MQ_RQ_IDLE = 0, MQ_RQ_IN_FLIGHT = 1, MQ_RQ_COMPLETE = 2, }; /* * Try to put the fields that are referenced together in the same cacheline. * * If you modify this structure, make sure to update blk_rq_init() and * especially blk_mq_rq_ctx_init() to take care of the added fields. */ struct request { struct request_queue *q; struct blk_mq_ctx *mq_ctx; struct blk_mq_hw_ctx *mq_hctx; unsigned int cmd_flags; /* op and common flags */ req_flags_t rq_flags; int tag; int internal_tag; /* the following two fields are internal, NEVER access directly */ unsigned int __data_len; /* total data len */ sector_t __sector; /* sector cursor */ struct bio *bio; struct bio *biotail; struct list_head queuelist; /* * The hash is used inside the scheduler, and killed once the * request reaches the dispatch list. The ipi_list is only used * to queue the request for softirq completion, which is long * after the request has been unhashed (and even removed from * the dispatch list). */ union { struct hlist_node hash; /* merge hash */ struct list_head ipi_list; }; /* * The rb_node is only used inside the io scheduler, requests * are pruned when moved to the dispatch queue. So let the * completion_data share space with the rb_node. */ union { struct rb_node rb_node; /* sort/lookup */ struct bio_vec special_vec; void *completion_data; int error_count; /* for legacy drivers, don't use */ }; /* * Three pointers are available for the IO schedulers, if they need * more they have to dynamically allocate it. Flush requests are * never put on the IO scheduler. So let the flush fields share * space with the elevator data. */ union { struct { struct io_cq *icq; void *priv[2]; } elv; struct { unsigned int seq; struct list_head list; rq_end_io_fn *saved_end_io; } flush; }; struct gendisk *rq_disk; struct hd_struct *part; #ifdef CONFIG_BLK_RQ_ALLOC_TIME /* Time that the first bio started allocating this request. */ u64 alloc_time_ns; #endif /* Time that this request was allocated for this IO. */ u64 start_time_ns; /* Time that I/O was submitted to the device. */ u64 io_start_time_ns; #ifdef CONFIG_BLK_WBT unsigned short wbt_flags; #endif /* * rq sectors used for blk stats. It has the same value * with blk_rq_sectors(rq), except that it never be zeroed * by completion. */ unsigned short stats_sectors; /* * Number of scatter-gather DMA addr+len pairs after * physical address coalescing is performed. */ unsigned short nr_phys_segments; #if defined(CONFIG_BLK_DEV_INTEGRITY) unsigned short nr_integrity_segments; #endif #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct bio_crypt_ctx *crypt_ctx; struct blk_ksm_keyslot *crypt_keyslot; #endif unsigned short write_hint; unsigned short ioprio; enum mq_rq_state state; refcount_t ref; unsigned int timeout; unsigned long deadline; union { struct __call_single_data csd; u64 fifo_time; }; /* * completion callback. */ rq_end_io_fn *end_io; void *end_io_data; }; static inline bool blk_op_is_scsi(unsigned int op) { return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT; } static inline bool blk_op_is_private(unsigned int op) { return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; } static inline bool blk_rq_is_scsi(struct request *rq) { return blk_op_is_scsi(req_op(rq)); } static inline bool blk_rq_is_private(struct request *rq) { return blk_op_is_private(req_op(rq)); } static inline bool blk_rq_is_passthrough(struct request *rq) { return blk_rq_is_scsi(rq) || blk_rq_is_private(rq); } static inline bool bio_is_passthrough(struct bio *bio) { unsigned op = bio_op(bio); return blk_op_is_scsi(op) || blk_op_is_private(op); } static inline unsigned short req_get_ioprio(struct request *req) { return req->ioprio; } #include <linux/elevator.h> struct blk_queue_ctx; struct bio_vec; enum blk_eh_timer_return { BLK_EH_DONE, /* drivers has completed the command */ BLK_EH_RESET_TIMER, /* reset timer and try again */ }; enum blk_queue_state { Queue_down, Queue_up, }; #define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */ #define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */ #define BLK_SCSI_MAX_CMDS (256) #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) /* * Zoned block device models (zoned limit). * * Note: This needs to be ordered from the least to the most severe * restrictions for the inheritance in blk_stack_limits() to work. */ enum blk_zoned_model { BLK_ZONED_NONE = 0, /* Regular block device */ BLK_ZONED_HA, /* Host-aware zoned block device */ BLK_ZONED_HM, /* Host-managed zoned block device */ }; struct queue_limits { unsigned long bounce_pfn; unsigned long seg_boundary_mask; unsigned long virt_boundary_mask; unsigned int max_hw_sectors; unsigned int max_dev_sectors; unsigned int chunk_sectors; unsigned int max_sectors; unsigned int max_segment_size; unsigned int physical_block_size; unsigned int logical_block_size; unsigned int alignment_offset; unsigned int io_min; unsigned int io_opt; unsigned int max_discard_sectors; unsigned int max_hw_discard_sectors; unsigned int max_write_same_sectors; unsigned int max_write_zeroes_sectors; unsigned int max_zone_append_sectors; unsigned int discard_granularity; unsigned int discard_alignment; unsigned short max_segments; unsigned short max_integrity_segments; unsigned short max_discard_segments; unsigned char misaligned; unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; enum blk_zoned_model zoned; }; typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, void *data); void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model); #ifdef CONFIG_BLK_DEV_ZONED #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); unsigned int blkdev_nr_zones(struct gendisk *disk); extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); int blk_revalidate_disk_zones(struct gendisk *disk, void (*update_driver_data)(struct gendisk *disk)); extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); #else /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int blkdev_nr_zones(struct gendisk *disk) { return 0; } static inline int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { return -ENOTTY; } static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { return -ENOTTY; } #endif /* CONFIG_BLK_DEV_ZONED */ struct request_queue { struct request *last_merge; struct elevator_queue *elevator; struct percpu_ref q_usage_counter; struct blk_queue_stats *stats; struct rq_qos *rq_qos; const struct blk_mq_ops *mq_ops; /* sw queues */ struct blk_mq_ctx __percpu *queue_ctx; unsigned int queue_depth; /* hw dispatch queues */ struct blk_mq_hw_ctx **queue_hw_ctx; unsigned int nr_hw_queues; struct backing_dev_info *backing_dev_info; /* * The queue owner gets to use this for whatever they like. * ll_rw_blk doesn't touch it. */ void *queuedata; /* * various queue flags, see QUEUE_* below */ unsigned long queue_flags; /* * Number of contexts that have called blk_set_pm_only(). If this * counter is above zero then only RQF_PM requests are processed. */ atomic_t pm_only; /* * ida allocated id for this queue. Used to index queues from * ioctx. */ int id; /* * queue needs bounce pages for pages above this limit */ gfp_t bounce_gfp; spinlock_t queue_lock; /* * queue kobject */ struct kobject kobj; /* * mq queue kobject */ struct kobject *mq_kobj; #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity integrity; #endif /* CONFIG_BLK_DEV_INTEGRITY */ #ifdef CONFIG_PM struct device *dev; enum rpm_status rpm_status; unsigned int nr_pending; #endif /* * queue settings */ unsigned long nr_requests; /* Max # of requests */ unsigned int dma_pad_mask; unsigned int dma_alignment; #ifdef CONFIG_BLK_INLINE_ENCRYPTION /* Inline crypto capabilities */ struct blk_keyslot_manager *ksm; #endif unsigned int rq_timeout; int poll_nsec; struct blk_stat_callback *poll_cb; struct blk_rq_stat poll_stat[BLK_MQ_POLL_STATS_BKTS]; struct timer_list timeout; struct work_struct timeout_work; atomic_t nr_active_requests_shared_sbitmap; struct list_head icq_list; #ifdef CONFIG_BLK_CGROUP DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); struct blkcg_gq *root_blkg; struct list_head blkg_list; #endif struct queue_limits limits; unsigned int required_elevator_features; #ifdef CONFIG_BLK_DEV_ZONED /* * Zoned block device information for request dispatch control. * nr_zones is the total number of zones of the device. This is always * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones * bits which indicates if a zone is conventional (bit set) or * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones * bits which indicates if a zone is write locked, that is, if a write * request targeting the zone was dispatched. All three fields are * initialized by the low level device driver (e.g. scsi/sd.c). * Stacking drivers (device mappers) may or may not initialize * these fields. * * Reads of this information must be protected with blk_queue_enter() / * blk_queue_exit(). Modifying this information is only allowed while * no requests are being processed. See also blk_mq_freeze_queue() and * blk_mq_unfreeze_queue(). */ unsigned int nr_zones; unsigned long *conv_zones_bitmap; unsigned long *seq_zones_wlock; unsigned int max_open_zones; unsigned int max_active_zones; #endif /* CONFIG_BLK_DEV_ZONED */ /* * sg stuff */ unsigned int sg_timeout; unsigned int sg_reserved_size; int node; struct mutex debugfs_mutex; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; #endif /* * for flush operations */ struct blk_flush_queue *fq; struct list_head requeue_list; spinlock_t requeue_lock; struct delayed_work requeue_work; struct mutex sysfs_lock; struct mutex sysfs_dir_lock; /* * for reusing dead hctx instance in case of updating * nr_hw_queues */ struct list_head unused_hctx_list; spinlock_t unused_hctx_lock; int mq_freeze_depth; #if defined(CONFIG_BLK_DEV_BSG) struct bsg_class_device bsg_dev; #endif #ifdef CONFIG_BLK_DEV_THROTTLING /* Throttle data */ struct throtl_data *td; #endif struct rcu_head rcu_head; wait_queue_head_t mq_freeze_wq; /* * Protect concurrent access to q_usage_counter by * percpu_ref_kill() and percpu_ref_reinit(). */ struct mutex mq_freeze_lock; struct blk_mq_tag_set *tag_set; struct list_head tag_set_list; struct bio_set bio_split; struct dentry *debugfs_dir; #ifdef CONFIG_BLK_DEBUG_FS struct dentry *sched_debugfs_dir; struct dentry *rqos_debugfs_dir; #endif bool mq_sysfs_init_done; size_t cmd_size; #define BLK_MAX_WRITE_HINTS 5 u64 write_hints[BLK_MAX_WRITE_HINTS]; }; /* Keep blk_queue_flag_name[] in sync with the definitions below */ #define QUEUE_FLAG_STOPPED 0 /* queue is stopped */ #define QUEUE_FLAG_DYING 1 /* queue being torn down */ #define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ #define QUEUE_FLAG_NONROT 6 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ #define QUEUE_FLAG_DISCARD 8 /* supports DISCARD */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SECERASE 11 /* supports secure erase */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_DEAD 13 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ #define QUEUE_FLAG_WC 17 /* Write back caching */ #define QUEUE_FLAG_FUA 18 /* device supports FUA writes */ #define QUEUE_FLAG_DAX 19 /* device supports DAX */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ #define QUEUE_FLAG_POLL_STATS 21 /* collecting stats for hybrid polling */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ #define QUEUE_FLAG_SCSI_PASSTHROUGH 23 /* queue supports SCSI commands */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ #define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ #define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */ #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */ #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ (1 << QUEUE_FLAG_NOWAIT)) void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) #define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) #define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) #define blk_queue_stable_writes(q) \ test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) #define blk_queue_zone_resetall(q) \ test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) #define blk_queue_secure_erase(q) \ (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags)) #define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) #define blk_queue_scsi_passthrough(q) \ test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags) #define blk_queue_pci_p2pdma(q) \ test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags) #ifdef CONFIG_BLK_RQ_ALLOC_TIME #define blk_queue_rq_alloc_time(q) \ test_bit(QUEUE_FLAG_RQ_ALLOC_TIME, &(q)->queue_flags) #else #define blk_queue_rq_alloc_time(q) false #endif #define blk_noretry_request(rq) \ ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ REQ_FAILFAST_DRIVER)) #define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags) #define blk_queue_pm_only(q) atomic_read(&(q)->pm_only) #define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags) #define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags) #define blk_queue_nowait(q) test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags) extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); static inline bool blk_account_rq(struct request *rq) { return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq); } #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) #define rq_dma_dir(rq) \ (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE) #define dma_map_bvec(dev, bv, dir, attrs) \ dma_map_page_attrs(dev, (bv)->bv_page, (bv)->bv_offset, (bv)->bv_len, \ (dir), (attrs)) static inline bool queue_is_mq(struct request_queue *q) { return q->mq_ops; } #ifdef CONFIG_PM static inline enum rpm_status queue_rpm_status(struct request_queue *q) { return q->rpm_status; } #else static inline enum rpm_status queue_rpm_status(struct request_queue *q) { return RPM_ACTIVE; } #endif static inline enum blk_zoned_model blk_queue_zoned_model(struct request_queue *q) { if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) return q->limits.zoned; return BLK_ZONED_NONE; } static inline bool blk_queue_is_zoned(struct request_queue *q) { switch (blk_queue_zoned_model(q)) { case BLK_ZONED_HA: case BLK_ZONED_HM: return true; default: return false; } } static inline sector_t blk_queue_zone_sectors(struct request_queue *q) { return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; } #ifdef CONFIG_BLK_DEV_ZONED static inline unsigned int blk_queue_nr_zones(struct request_queue *q) { return blk_queue_is_zoned(q) ? q->nr_zones : 0; } static inline unsigned int blk_queue_zone_no(struct request_queue *q, sector_t sector) { if (!blk_queue_is_zoned(q)) return 0; return sector >> ilog2(q->limits.chunk_sectors); } static inline bool blk_queue_zone_is_seq(struct request_queue *q, sector_t sector) { if (!blk_queue_is_zoned(q)) return false; if (!q->conv_zones_bitmap) return true; return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); } static inline void blk_queue_max_open_zones(struct request_queue *q, unsigned int max_open_zones) { q->max_open_zones = max_open_zones; } static inline unsigned int queue_max_open_zones(const struct request_queue *q) { return q->max_open_zones; } static inline void blk_queue_max_active_zones(struct request_queue *q, unsigned int max_active_zones) { q->max_active_zones = max_active_zones; } static inline unsigned int queue_max_active_zones(const struct request_queue *q) { return q->max_active_zones; } #else /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int blk_queue_nr_zones(struct request_queue *q) { return 0; } static inline bool blk_queue_zone_is_seq(struct request_queue *q, sector_t sector) { return false; } static inline unsigned int blk_queue_zone_no(struct request_queue *q, sector_t sector) { return 0; } static inline unsigned int queue_max_open_zones(const struct request_queue *q) { return 0; } static inline unsigned int queue_max_active_zones(const struct request_queue *q) { return 0; } #endif /* CONFIG_BLK_DEV_ZONED */ static inline bool rq_is_sync(struct request *rq) { return op_is_sync(rq->cmd_flags); } static inline bool rq_mergeable(struct request *rq) { if (blk_rq_is_passthrough(rq)) return false; if (req_op(rq) == REQ_OP_FLUSH) return false; if (req_op(rq) == REQ_OP_WRITE_ZEROES) return false; if (req_op(rq) == REQ_OP_ZONE_APPEND) return false; if (rq->cmd_flags & REQ_NOMERGE_FLAGS) return false; if (rq->rq_flags & RQF_NOMERGE_FLAGS) return false; return true; } static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b) { if (bio_page(a) == bio_page(b) && bio_offset(a) == bio_offset(b)) return true; return false; } static inline unsigned int blk_queue_depth(struct request_queue *q) { if (q->queue_depth) return q->queue_depth; return q->nr_requests; } extern unsigned long blk_max_low_pfn, blk_max_pfn; /* * standard bounce addresses: * * BLK_BOUNCE_HIGH : bounce all highmem pages * BLK_BOUNCE_ANY : don't bounce anything * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary */ #if BITS_PER_LONG == 32 #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) #else #define BLK_BOUNCE_HIGH -1ULL #endif #define BLK_BOUNCE_ANY (-1ULL) #define BLK_BOUNCE_ISA (DMA_BIT_MASK(24)) /* * default timeout for SG_IO if none specified */ #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) #define BLK_MIN_SG_TIMEOUT (7 * HZ) struct rq_map_data { struct page **pages; int page_order; int nr_entries; unsigned long offset; int null_mapped; int from_user; }; struct req_iterator { struct bvec_iter iter; struct bio *bio; }; /* This should not be used directly - use rq_for_each_segment */ #define for_each_bio(_bio) \ for (; _bio; _bio = _bio->bi_next) #define __rq_for_each_bio(_bio, rq) \ if ((rq->bio)) \ for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) #define rq_for_each_segment(bvl, _rq, _iter) \ __rq_for_each_bio(_iter.bio, _rq) \ bio_for_each_segment(bvl, _iter.bio, _iter.iter) #define rq_for_each_bvec(bvl, _rq, _iter) \ __rq_for_each_bio(_iter.bio, _rq) \ bio_for_each_bvec(bvl, _iter.bio, _iter.iter) #define rq_iter_last(bvec, _iter) \ (_iter.bio->bi_next == NULL && \ bio_iter_last(bvec, _iter.iter)) #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE # error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" #endif #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE extern void rq_flush_dcache_pages(struct request *rq); #else static inline void rq_flush_dcache_pages(struct request *rq) { } #endif extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); blk_qc_t submit_bio_noacct(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern struct request *blk_get_request(struct request_queue *, unsigned int op, blk_mq_req_flags_t flags); extern int blk_lld_busy(struct request_queue *q); extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, struct bio_set *bs, gfp_t gfp_mask, int (*bio_ctr)(struct bio *, struct bio *, void *), void *data); extern void blk_rq_unprep_clone(struct request *rq); extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern int blk_rq_append_bio(struct request *rq, struct bio **bio); extern void blk_queue_split(struct bio **); extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t, unsigned int, void __user *); extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, unsigned int, void __user *); extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); extern int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp); extern int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp); extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags); extern void blk_queue_exit(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t); extern int blk_rq_unmap_user(struct bio *); extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); extern int blk_rq_map_user_iov(struct request_queue *, struct request *, struct rq_map_data *, const struct iov_iter *, gfp_t); extern void blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); /* Helper to convert REQ_OP_XXX to its string format XXX */ extern const char *blk_op_str(unsigned int op); int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { return bdev->bd_disk->queue; /* this is never NULL */ } /* * The basic unit of block I/O is a sector. It is used in a number of contexts * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9 * bytes. Variables of type sector_t represent an offset or size that is a * multiple of 512 bytes. Hence these two constants. */ #ifndef SECTOR_SHIFT #define SECTOR_SHIFT 9 #endif #ifndef SECTOR_SIZE #define SECTOR_SIZE (1 << SECTOR_SHIFT) #endif /* * blk_rq_pos() : the current sector * blk_rq_bytes() : bytes left in the entire request * blk_rq_cur_bytes() : bytes left in the current segment * blk_rq_err_bytes() : bytes left till the next error boundary * blk_rq_sectors() : sectors left in the entire request * blk_rq_cur_sectors() : sectors left in the current segment * blk_rq_stats_sectors() : sectors of the entire request used for stats */ static inline sector_t blk_rq_pos(const struct request *rq) { return rq->__sector; } static inline unsigned int blk_rq_bytes(const struct request *rq) { return rq->__data_len; } static inline int blk_rq_cur_bytes(const struct request *rq) { return rq->bio ? bio_cur_bytes(rq->bio) : 0; } extern unsigned int blk_rq_err_bytes(const struct request *rq); static inline unsigned int blk_rq_sectors(const struct request *rq) { return blk_rq_bytes(rq) >> SECTOR_SHIFT; } static inline unsigned int blk_rq_cur_sectors(const struct request *rq) { return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT; } static inline unsigned int blk_rq_stats_sectors(const struct request *rq) { return rq->stats_sectors; } #ifdef CONFIG_BLK_DEV_ZONED /* Helper to convert BLK_ZONE_ZONE_XXX to its string format XXX */ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond); static inline unsigned int blk_rq_zone_no(struct request *rq) { return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); } static inline unsigned int blk_rq_zone_is_seq(struct request *rq) { return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq)); } #endif /* CONFIG_BLK_DEV_ZONED */ /* * Some commands like WRITE SAME have a payload or data transfer size which * is different from the size of the request. Any driver that supports such * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to * calculate the data transfer size. */ static inline unsigned int blk_rq_payload_bytes(struct request *rq) { if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) return rq->special_vec.bv_len; return blk_rq_bytes(rq); } /* * Return the first full biovec in the request. The caller needs to check that * there are any bvecs before calling this helper. */ static inline struct bio_vec req_bvec(struct request *rq) { if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) return rq->special_vec; return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter); } static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, int op) { if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)) return min(q->limits.max_discard_sectors, UINT_MAX >> SECTOR_SHIFT); if (unlikely(op == REQ_OP_WRITE_SAME)) return q->limits.max_write_same_sectors; if (unlikely(op == REQ_OP_WRITE_ZEROES)) return q->limits.max_write_zeroes_sectors; return q->limits.max_sectors; } /* * Return maximum size of a request at given offset. Only valid for * file system requests. */ static inline unsigned int blk_max_size_offset(struct request_queue *q, sector_t offset, unsigned int chunk_sectors) { if (!chunk_sectors) { if (q->limits.chunk_sectors) chunk_sectors = q->limits.chunk_sectors; else return q->limits.max_sectors; } if (likely(is_power_of_2(chunk_sectors))) chunk_sectors -= offset & (chunk_sectors - 1); else chunk_sectors -= sector_div(offset, chunk_sectors); return min(q->limits.max_sectors, chunk_sectors); } static inline unsigned int blk_rq_get_max_sectors(struct request *rq, sector_t offset) { struct request_queue *q = rq->q; if (blk_rq_is_passthrough(rq)) return q->limits.max_hw_sectors; if (!q->limits.chunk_sectors || req_op(rq) == REQ_OP_DISCARD || req_op(rq) == REQ_OP_SECURE_ERASE) return blk_queue_get_max_sectors(q, req_op(rq)); return min(blk_max_size_offset(q, offset, 0), blk_queue_get_max_sectors(q, req_op(rq))); } static inline unsigned int blk_rq_count_bios(struct request *rq) { unsigned int nr_bios = 0; struct bio *bio; __rq_for_each_bio(bio, rq) nr_bios++; return nr_bios; } void blk_steal_bios(struct bio_list *list, struct request *rq); /* * Request completion related functions. * * blk_update_request() completes given number of bytes and updates * the request without completing it. */ extern bool blk_update_request(struct request *rq, blk_status_t error, unsigned int nr_bytes); extern void blk_abort_request(struct request *); /* * Access functions for manipulating queue properties */ extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_bounce_limit(struct request_queue *, u64); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_segments(struct request_queue *, unsigned short); extern void blk_queue_max_discard_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); extern void blk_queue_max_write_same_sectors(struct request_queue *q, unsigned int max_write_same_sectors); extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q, unsigned int max_write_same_sectors); extern void blk_queue_logical_block_size(struct request_queue *, unsigned int); extern void blk_queue_max_zone_append_sectors(struct request_queue *q, unsigned int max_zone_append_sectors); extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); extern void blk_queue_alignment_offset(struct request_queue *q, unsigned int alignment); void blk_queue_update_readahead(struct request_queue *q); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); extern void blk_set_default_limits(struct queue_limits *lim); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); extern void blk_queue_required_elevator_features(struct request_queue *q, unsigned int features); extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q, struct device *dev); /* * Number of physical segments as sent to the device. * * Normally this is the number of discontiguous data segments sent by the * submitter. But for data-less command like discard we might have no * actual data segments submitted, but the driver might have to add it's * own special payload. In that case we still return 1 here so that this * special payload will be mapped. */ static inline unsigned short blk_rq_nr_phys_segments(struct request *rq) { if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) return 1; return rq->nr_phys_segments; } /* * Number of discard segments (or ranges) the driver needs to fill in. * Each discard bio merged into a request is counted as one segment. */ static inline unsigned short blk_rq_nr_discard_segments(struct request *rq) { return max_t(unsigned short, rq->nr_phys_segments, 1); } int __blk_rq_map_sg(struct request_queue *q, struct request *rq, struct scatterlist *sglist, struct scatterlist **last_sg); static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq, struct scatterlist *sglist) { struct scatterlist *last_sg = NULL; return __blk_rq_map_sg(q, rq, sglist, &last_sg); } extern void blk_dump_rq_flags(struct request *, char *); bool __must_check blk_get_queue(struct request_queue *); struct request_queue *blk_alloc_queue(int node_id); extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); #ifdef CONFIG_BLOCK /* * blk_plug permits building a queue of related requests by holding the I/O * fragments for a short period. This allows merging of sequential requests * into single larger request. As the requests are moved from a per-task list to * the device's request_queue in a batch, this results in improved scalability * as the lock contention for request_queue lock is reduced. * * It is ok not to disable preemption when adding the request to the plug list * or when attempting a merge, because blk_schedule_flush_list() will only flush * the plug list when the task sleeps by itself. For details, please see * schedule() where blk_schedule_flush_plug() is called. */ struct blk_plug { struct list_head mq_list; /* blk-mq requests */ struct list_head cb_list; /* md requires an unplug callback */ unsigned short rq_count; bool multiple_queues; bool nowait; }; struct blk_plug_cb; typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool); struct blk_plug_cb { struct list_head list; blk_plug_cb_fn callback; void *data; }; extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data, int size); extern void blk_start_plug(struct blk_plug *); extern void blk_finish_plug(struct blk_plug *); extern void blk_flush_plug_list(struct blk_plug *, bool); static inline void blk_flush_plug(struct task_struct *tsk) { struct blk_plug *plug = tsk->plug; if (plug) blk_flush_plug_list(plug, false); } static inline void blk_schedule_flush_plug(struct task_struct *tsk) { struct blk_plug *plug = tsk->plug; if (plug) blk_flush_plug_list(plug, true); } static inline bool blk_needs_flush_plug(struct task_struct *tsk) { struct blk_plug *plug = tsk->plug; return plug && (!list_empty(&plug->mq_list) || !list_empty(&plug->cb_list)); } int blkdev_issue_flush(struct block_device *, gfp_t); long nr_blockdev_pages(void); #else /* CONFIG_BLOCK */ struct blk_plug { }; static inline void blk_start_plug(struct blk_plug *plug) { } static inline void blk_finish_plug(struct blk_plug *plug) { } static inline void blk_flush_plug(struct task_struct *task) { } static inline void blk_schedule_flush_plug(struct task_struct *task) { } static inline bool blk_needs_flush_plug(struct task_struct *tsk) { return false; } static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask) { return 0; } static inline long nr_blockdev_pages(void) { return 0; } #endif /* CONFIG_BLOCK */ extern void blk_io_schedule(void); extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct page *page); #define BLKDEV_DISCARD_SECURE (1 << 0) /* issue a secure erase */ extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, int flags, struct bio **biop); #define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */ #define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */ extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, unsigned flags); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned flags); static inline int sb_issue_discard(struct super_block *sb, sector_t block, sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) { return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - SECTOR_SHIFT), nr_blocks << (sb->s_blocksize_bits - SECTOR_SHIFT), gfp_mask, flags); } static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, sector_t nr_blocks, gfp_t gfp_mask) { return blkdev_issue_zeroout(sb->s_bdev, block << (sb->s_blocksize_bits - SECTOR_SHIFT), nr_blocks << (sb->s_blocksize_bits - SECTOR_SHIFT), gfp_mask, 0); } extern int blk_verify_command(unsigned char *cmd, fmode_t mode); static inline bool bdev_is_partition(struct block_device *bdev) { return bdev->bd_partno; } enum blk_default_limits { BLK_MAX_SEGMENTS = 128, BLK_SAFE_MAX_SECTORS = 255, BLK_DEF_MAX_SECTORS = 2560, BLK_MAX_SEGMENT_SIZE = 65536, BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, }; static inline unsigned long queue_segment_boundary(const struct request_queue *q) { return q->limits.seg_boundary_mask; } static inline unsigned long queue_virt_boundary(const struct request_queue *q) { return q->limits.virt_boundary_mask; } static inline unsigned int queue_max_sectors(const struct request_queue *q) { return q->limits.max_sectors; } static inline unsigned int queue_max_hw_sectors(const struct request_queue *q) { return q->limits.max_hw_sectors; } static inline unsigned short queue_max_segments(const struct request_queue *q) { return q->limits.max_segments; } static inline unsigned short queue_max_discard_segments(const struct request_queue *q) { return q->limits.max_discard_segments; } static inline unsigned int queue_max_segment_size(const struct request_queue *q) { return q->limits.max_segment_size; } static inline unsigned int queue_max_zone_append_sectors(const struct request_queue *q) { const struct queue_limits *l = &q->limits; return min(l->max_zone_append_sectors, l->max_sectors); } static inline unsigned queue_logical_block_size(const struct request_queue *q) { int retval = 512; if (q && q->limits.logical_block_size) retval = q->limits.logical_block_size; return retval; } static inline unsigned int bdev_logical_block_size(struct block_device *bdev) { return queue_logical_block_size(bdev_get_queue(bdev)); } static inline unsigned int queue_physical_block_size(const struct request_queue *q) { return q->limits.physical_block_size; } static inline unsigned int bdev_physical_block_size(struct block_device *bdev) { return queue_physical_block_size(bdev_get_queue(bdev)); } static inline unsigned int queue_io_min(const struct request_queue *q) { return q->limits.io_min; } static inline int bdev_io_min(struct block_device *bdev) { return queue_io_min(bdev_get_queue(bdev)); } static inline unsigned int queue_io_opt(const struct request_queue *q) { return q->limits.io_opt; } static inline int bdev_io_opt(struct block_device *bdev) { return queue_io_opt(bdev_get_queue(bdev)); } static inline int queue_alignment_offset(const struct request_queue *q) { if (q->limits.misaligned) return -1; return q->limits.alignment_offset; } static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector) { unsigned int granularity = max(lim->physical_block_size, lim->io_min); unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT) << SECTOR_SHIFT; return (granularity + lim->alignment_offset - alignment) % granularity; } static inline int bdev_alignment_offset(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); if (q->limits.misaligned) return -1; if (bdev_is_partition(bdev)) return queue_limit_alignment_offset(&q->limits, bdev->bd_part->start_sect); return q->limits.alignment_offset; } static inline int queue_discard_alignment(const struct request_queue *q) { if (q->limits.discard_misaligned) return -1; return q->limits.discard_alignment; } static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector) { unsigned int alignment, granularity, offset; if (!lim->max_discard_sectors) return 0; /* Why are these in bytes, not sectors? */ alignment = lim->discard_alignment >> SECTOR_SHIFT; granularity = lim->discard_granularity >> SECTOR_SHIFT; if (!granularity) return 0; /* Offset of the partition start in 'granularity' sectors */ offset = sector_div(sector, granularity); /* And why do we do this modulus *again* in blkdev_issue_discard()? */ offset = (granularity + alignment - offset) % granularity; /* Turn it back into bytes, gaah */ return offset << SECTOR_SHIFT; } /* * Two cases of handling DISCARD merge: * If max_discard_segments > 1, the driver takes every bio * as a range and send them to controller together. The ranges * needn't to be contiguous. * Otherwise, the bios/requests will be handled as same as * others which should be contiguous. */ static inline bool blk_discard_mergable(struct request *req) { if (req_op(req) == REQ_OP_DISCARD && queue_max_discard_segments(req->q) > 1) return true; return false; } static inline int bdev_discard_alignment(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); if (bdev_is_partition(bdev)) return queue_limit_discard_alignment(&q->limits, bdev->bd_part->start_sect); return q->limits.discard_alignment; } static inline unsigned int bdev_write_same(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); if (q) return q->limits.max_write_same_sectors; return 0; } static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); if (q) return q->limits.max_write_zeroes_sectors; return 0; } static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); if (q) return blk_queue_zoned_model(q); return BLK_ZONED_NONE; } static inline bool bdev_is_zoned(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); if (q) return blk_queue_is_zoned(q); return false; } static inline sector_t bdev_zone_sectors(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); if (q) return blk_queue_zone_sectors(q); return 0; } static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); if (q) return queue_max_open_zones(q); return 0; } static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); if (q) return queue_max_active_zones(q); return 0; } static inline int queue_dma_alignment(const struct request_queue *q) { return q ? q->dma_alignment : 511; } static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, unsigned int len) { unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; return !(addr & alignment) && !(len & alignment); } /* assumes size > 256 */ static inline unsigned int blksize_bits(unsigned int size) { unsigned int bits = 8; do { bits++; size >>= 1; } while (size > 256); return bits; } static inline unsigned int block_size(struct block_device *bdev) { return 1 << bdev->bd_inode->i_blkbits; } int kblockd_schedule_work(struct work_struct *work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ MODULE_ALIAS("block-major-" __stringify(major) "-*") #if defined(CONFIG_BLK_DEV_INTEGRITY) enum blk_integrity_flags { BLK_INTEGRITY_VERIFY = 1 << 0, BLK_INTEGRITY_GENERATE = 1 << 1, BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2, BLK_INTEGRITY_IP_CHECKSUM = 1 << 3, }; struct blk_integrity_iter { void *prot_buf; void *data_buf; sector_t seed; unsigned int data_size; unsigned short interval; const char *disk_name; }; typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *); typedef void (integrity_prepare_fn) (struct request *); typedef void (integrity_complete_fn) (struct request *, unsigned int); struct blk_integrity_profile { integrity_processing_fn *generate_fn; integrity_processing_fn *verify_fn; integrity_prepare_fn *prepare_fn; integrity_complete_fn *complete_fn; const char *name; }; extern void blk_integrity_register(struct gendisk *, struct blk_integrity *); extern void blk_integrity_unregister(struct gendisk *); extern int blk_integrity_compare(struct gendisk *, struct gendisk *); extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, struct scatterlist *); extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) { struct blk_integrity *bi = &disk->queue->integrity; if (!bi->profile) return NULL; return bi; } static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev) { return blk_get_integrity(bdev->bd_disk); } static inline bool blk_integrity_queue_supports_integrity(struct request_queue *q) { return q->integrity.profile; } static inline bool blk_integrity_rq(struct request *rq) { return rq->cmd_flags & REQ_INTEGRITY; } static inline void blk_queue_max_integrity_segments(struct request_queue *q, unsigned int segs) { q->limits.max_integrity_segments = segs; } static inline unsigned short queue_max_integrity_segments(const struct request_queue *q) { return q->limits.max_integrity_segments; } /** * bio_integrity_intervals - Return number of integrity intervals for a bio * @bi: blk_integrity profile for device * @sectors: Size of the bio in 512-byte sectors * * Description: The block layer calculates everything in 512 byte * sectors but integrity metadata is done in terms of the data integrity * interval size of the storage device. Convert the block layer sectors * to the appropriate number of integrity intervals. */ static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, unsigned int sectors) { return sectors >> (bi->interval_exp - 9); } static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, unsigned int sectors) { return bio_integrity_intervals(bi, sectors) * bi->tuple_size; } /* * Return the first bvec that contains integrity data. Only drivers that are * limited to a single integrity segment should use this helper. */ static inline struct bio_vec *rq_integrity_vec(struct request *rq) { if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1)) return NULL; return rq->bio->bi_integrity->bip_vec; } #else /* CONFIG_BLK_DEV_INTEGRITY */ struct bio; struct block_device; struct gendisk; struct blk_integrity; static inline int blk_integrity_rq(struct request *rq) { return 0; } static inline int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *b) { return 0; } static inline int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *b, struct scatterlist *s) { return 0; } static inline struct blk_integrity *bdev_get_integrity(struct block_device *b) { return NULL; } static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) { return NULL; } static inline bool blk_integrity_queue_supports_integrity(struct request_queue *q) { return false; } static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b) { return 0; } static inline void blk_integrity_register(struct gendisk *d, struct blk_integrity *b) { } static inline void blk_integrity_unregister(struct gendisk *d) { } static inline void blk_queue_max_integrity_segments(struct request_queue *q, unsigned int segs) { } static inline unsigned short queue_max_integrity_segments(const struct request_queue *q) { return 0; } static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, unsigned int sectors) { return 0; } static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, unsigned int sectors) { return 0; } static inline struct bio_vec *rq_integrity_vec(struct request *rq) { return NULL; } #endif /* CONFIG_BLK_DEV_INTEGRITY */ #ifdef CONFIG_BLK_INLINE_ENCRYPTION bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q); void blk_ksm_unregister(struct request_queue *q); #else /* CONFIG_BLK_INLINE_ENCRYPTION */ static inline bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q) { return true; } static inline void blk_ksm_unregister(struct request_queue *q) { } #endif /* CONFIG_BLK_INLINE_ENCRYPTION */ struct block_device_operations { blk_qc_t (*submit_bio) (struct bio *bio); int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); void (*unlock_native_capacity) (struct gendisk *); int (*revalidate_disk) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); /* this callback is with swap_lock and sometimes page table lock held */ void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); char *(*devnode)(struct gendisk *disk, umode_t *mode); struct module *owner; const struct pr_ops *pr_ops; }; #ifdef CONFIG_COMPAT extern int blkdev_compat_ptr_ioctl(struct block_device *, fmode_t, unsigned int, unsigned long); #else #define blkdev_compat_ptr_ioctl NULL #endif extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, unsigned long); extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); #ifdef CONFIG_BLK_DEV_ZONED bool blk_req_needs_zone_write_lock(struct request *rq); bool blk_req_zone_write_trylock(struct request *rq); void __blk_req_zone_write_lock(struct request *rq); void __blk_req_zone_write_unlock(struct request *rq); static inline void blk_req_zone_write_lock(struct request *rq) { if (blk_req_needs_zone_write_lock(rq)) __blk_req_zone_write_lock(rq); } static inline void blk_req_zone_write_unlock(struct request *rq) { if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED) __blk_req_zone_write_unlock(rq); } static inline bool blk_req_zone_is_write_locked(struct request *rq) { return rq->q->seq_zones_wlock && test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock); } static inline bool blk_req_can_dispatch_to_zone(struct request *rq) { if (!blk_req_needs_zone_write_lock(rq)) return true; return !blk_req_zone_is_write_locked(rq); } #else static inline bool blk_req_needs_zone_write_lock(struct request *rq) { return false; } static inline void blk_req_zone_write_lock(struct request *rq) { } static inline void blk_req_zone_write_unlock(struct request *rq) { } static inline bool blk_req_zone_is_write_locked(struct request *rq) { return false; } static inline bool blk_req_can_dispatch_to_zone(struct request *rq) { return true; } #endif /* CONFIG_BLK_DEV_ZONED */ static inline void blk_wake_io_task(struct task_struct *waiter) { /* * If we're polling, the task itself is doing the completions. For * that case, we don't need to signal a wakeup, it's enough to just * mark us as RUNNING. */ if (waiter == current) __set_current_state(TASK_RUNNING); else wake_up_process(waiter); } unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, unsigned int op); void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long start_time); unsigned long part_start_io_acct(struct gendisk *disk, struct hd_struct **part, struct bio *bio); void part_end_io_acct(struct hd_struct *part, struct bio *bio, unsigned long start_time); /** * bio_start_io_acct - start I/O accounting for bio based drivers * @bio: bio to start account for * * Returns the start time that should be passed back to bio_end_io_acct(). */ static inline unsigned long bio_start_io_acct(struct bio *bio) { return disk_start_io_acct(bio->bi_disk, bio_sectors(bio), bio_op(bio)); } /** * bio_end_io_acct - end I/O accounting for bio based drivers * @bio: bio to end account for * @start: start time returned by bio_start_io_acct() */ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) { return disk_end_io_acct(bio->bi_disk, bio_op(bio), start_time); } int bdev_read_only(struct block_device *bdev); int set_blocksize(struct block_device *bdev, int size); const char *bdevname(struct block_device *bdev, char *buffer); struct block_device *lookup_bdev(const char *); void blkdev_show(struct seq_file *seqf, off_t offset); #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ #ifdef CONFIG_BLOCK #define BLKDEV_MAJOR_MAX 512 #else #define BLKDEV_MAJOR_MAX 0 #endif struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder); struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole, void *holder); void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, void *holder); void blkdev_put(struct block_device *bdev, fmode_t mode); struct block_device *I_BDEV(struct inode *inode); struct block_device *bdget_part(struct hd_struct *part); struct block_device *bdgrab(struct block_device *bdev); void bdput(struct block_device *); #ifdef CONFIG_BLOCK void invalidate_bdev(struct block_device *bdev); int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart, loff_t lend); int sync_blockdev(struct block_device *bdev); #else static inline void invalidate_bdev(struct block_device *bdev) { } static inline int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart, loff_t lend) { return 0; } static inline int sync_blockdev(struct block_device *bdev) { return 0; } #endif int fsync_bdev(struct block_device *bdev); struct super_block *freeze_bdev(struct block_device *bdev); int thaw_bdev(struct block_device *bdev, struct super_block *sb); #endif /* _LINUX_BLKDEV_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright 1997-1998 Transmeta Corporation - All Rights Reserved * Copyright 2005-2006 Ian Kent <raven@themaw.net> */ /* Internal header file for autofs */ #include <linux/auto_fs.h> #include <linux/auto_dev-ioctl.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/string.h> #include <linux/wait.h> #include <linux/sched.h> #include <linux/sched/signal.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/uaccess.h> #include <linux/mutex.h> #include <linux/spinlock.h> #include <linux/list.h> #include <linux/completion.h> #include <linux/file.h> #include <linux/magic.h> /* This is the range of ioctl() numbers we claim as ours */ #define AUTOFS_IOC_FIRST AUTOFS_IOC_READY #define AUTOFS_IOC_COUNT 32 #define AUTOFS_DEV_IOCTL_IOC_FIRST (AUTOFS_DEV_IOCTL_VERSION) #define AUTOFS_DEV_IOCTL_IOC_COUNT \ (AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD - AUTOFS_DEV_IOCTL_VERSION_CMD) #ifdef pr_fmt #undef pr_fmt #endif #define pr_fmt(fmt) KBUILD_MODNAME ":pid:%d:%s: " fmt, current->pid, __func__ extern struct file_system_type autofs_fs_type; /* * Unified info structure. This is pointed to by both the dentry and * inode structures. Each file in the filesystem has an instance of this * structure. It holds a reference to the dentry, so dentries are never * flushed while the file exists. All name lookups are dealt with at the * dentry level, although the filesystem can interfere in the validation * process. Readdir is implemented by traversing the dentry lists. */ struct autofs_info { struct dentry *dentry; struct inode *inode; int flags; struct completion expire_complete; struct list_head active; struct list_head expiring; struct autofs_sb_info *sbi; unsigned long last_used; int count; kuid_t uid; kgid_t gid; struct rcu_head rcu; }; #define AUTOFS_INF_EXPIRING (1<<0) /* dentry in the process of expiring */ #define AUTOFS_INF_WANT_EXPIRE (1<<1) /* the dentry is being considered * for expiry, so RCU_walk is * not permitted. If it progresses to * actual expiry attempt, the flag is * not cleared when EXPIRING is set - * in that case it gets cleared only * when it comes to clearing EXPIRING. */ #define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */ struct autofs_wait_queue { wait_queue_head_t queue; struct autofs_wait_queue *next; autofs_wqt_t wait_queue_token; /* We use the following to see what we are waiting for */ struct qstr name; u32 dev; u64 ino; kuid_t uid; kgid_t gid; pid_t pid; pid_t tgid; /* This is for status reporting upon return */ int status; unsigned int wait_ctr; }; #define AUTOFS_SBI_MAGIC 0x6d4a556d #define AUTOFS_SBI_CATATONIC 0x0001 #define AUTOFS_SBI_STRICTEXPIRE 0x0002 #define AUTOFS_SBI_IGNORE 0x0004 struct autofs_sb_info { u32 magic; int pipefd; struct file *pipe; struct pid *oz_pgrp; int version; int sub_version; int min_proto; int max_proto; unsigned int flags; unsigned long exp_timeout; unsigned int type; struct super_block *sb; struct mutex wq_mutex; struct mutex pipe_mutex; spinlock_t fs_lock; struct autofs_wait_queue *queues; /* Wait queue pointer */ spinlock_t lookup_lock; struct list_head active_list; struct list_head expiring_list; struct rcu_head rcu; }; static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb) { return (struct autofs_sb_info *)(sb->s_fs_info); } static inline struct autofs_info *autofs_dentry_ino(struct dentry *dentry) { return (struct autofs_info *)(dentry->d_fsdata); } /* autofs_oz_mode(): do we see the man behind the curtain? (The * processes which do manipulations for us in user space sees the raw * filesystem without "magic".) */ static inline int autofs_oz_mode(struct autofs_sb_info *sbi) { return ((sbi->flags & AUTOFS_SBI_CATATONIC) || task_pgrp(current) == sbi->oz_pgrp); } struct inode *autofs_get_inode(struct super_block *, umode_t); void autofs_free_ino(struct autofs_info *); /* Expiration */ int is_autofs_dentry(struct dentry *); int autofs_expire_wait(const struct path *path, int rcu_walk); int autofs_expire_run(struct super_block *, struct vfsmount *, struct autofs_sb_info *, struct autofs_packet_expire __user *); int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, struct autofs_sb_info *sbi, unsigned int how); int autofs_expire_multi(struct super_block *, struct vfsmount *, struct autofs_sb_info *, int __user *); /* Device node initialization */ int autofs_dev_ioctl_init(void); void autofs_dev_ioctl_exit(void); /* Operations structures */ extern const struct inode_operations autofs_symlink_inode_operations; extern const struct inode_operations autofs_dir_inode_operations; extern const struct file_operations autofs_dir_operations; extern const struct file_operations autofs_root_operations; extern const struct dentry_operations autofs_dentry_operations; /* VFS automount flags management functions */ static inline void __managed_dentry_set_managed(struct dentry *dentry) { dentry->d_flags |= (DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT); } static inline void managed_dentry_set_managed(struct dentry *dentry) { spin_lock(&dentry->d_lock); __managed_dentry_set_managed(dentry); spin_unlock(&dentry->d_lock); } static inline void __managed_dentry_clear_managed(struct dentry *dentry) { dentry->d_flags &= ~(DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT); } static inline void managed_dentry_clear_managed(struct dentry *dentry) { spin_lock(&dentry->d_lock); __managed_dentry_clear_managed(dentry); spin_unlock(&dentry->d_lock); } /* Initializing function */ int autofs_fill_super(struct super_block *, void *, int); struct autofs_info *autofs_new_ino(struct autofs_sb_info *); void autofs_clean_ino(struct autofs_info *); static inline int autofs_prepare_pipe(struct file *pipe) { if (!(pipe->f_mode & FMODE_CAN_WRITE)) return -EINVAL; if (!S_ISFIFO(file_inode(pipe)->i_mode)) return -EINVAL; /* We want a packet pipe */ pipe->f_flags |= O_DIRECT; /* We don't expect -EAGAIN */ pipe->f_flags &= ~O_NONBLOCK; return 0; } /* Queue management functions */ int autofs_wait(struct autofs_sb_info *, const struct path *, enum autofs_notify); int autofs_wait_release(struct autofs_sb_info *, autofs_wqt_t, int); void autofs_catatonic_mode(struct autofs_sb_info *); static inline u32 autofs_get_dev(struct autofs_sb_info *sbi) { return new_encode_dev(sbi->sb->s_dev); } static inline u64 autofs_get_ino(struct autofs_sb_info *sbi) { return d_inode(sbi->sb->s_root)->i_ino; } static inline void __autofs_add_expiring(struct dentry *dentry) { struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); struct autofs_info *ino = autofs_dentry_ino(dentry); if (ino) { if (list_empty(&ino->expiring)) list_add(&ino->expiring, &sbi->expiring_list); } } static inline void autofs_add_expiring(struct dentry *dentry) { struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); struct autofs_info *ino = autofs_dentry_ino(dentry); if (ino) { spin_lock(&sbi->lookup_lock); if (list_empty(&ino->expiring)) list_add(&ino->expiring, &sbi->expiring_list); spin_unlock(&sbi->lookup_lock); } } static inline void autofs_del_expiring(struct dentry *dentry) { struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); struct autofs_info *ino = autofs_dentry_ino(dentry); if (ino) { spin_lock(&sbi->lookup_lock); if (!list_empty(&ino->expiring)) list_del_init(&ino->expiring); spin_unlock(&sbi->lookup_lock); } } void autofs_kill_sb(struct super_block *);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_ATOMIC64_64_H #define _ASM_X86_ATOMIC64_64_H #include <linux/types.h> #include <asm/alternative.h> #include <asm/cmpxchg.h> /* The 64-bit atomic type */ #define ATOMIC64_INIT(i) { (i) } /** * arch_atomic64_read - read atomic64 variable * @v: pointer of type atomic64_t * * Atomically reads the value of @v. * Doesn't imply a read memory barrier. */ static inline s64 arch_atomic64_read(const atomic64_t *v) { return __READ_ONCE((v)->counter); } /** * arch_atomic64_set - set atomic64 variable * @v: pointer to type atomic64_t * @i: required value * * Atomically sets the value of @v to @i. */ static inline void arch_atomic64_set(atomic64_t *v, s64 i) { __WRITE_ONCE(v->counter, i); } /** * arch_atomic64_add - add integer to atomic64 variable * @i: integer value to add * @v: pointer to type atomic64_t * * Atomically adds @i to @v. */ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "addq %1,%0" : "=m" (v->counter) : "er" (i), "m" (v->counter) : "memory"); } /** * arch_atomic64_sub - subtract the atomic64 variable * @i: integer value to subtract * @v: pointer to type atomic64_t * * Atomically subtracts @i from @v. */ static inline void arch_atomic64_sub(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "subq %1,%0" : "=m" (v->counter) : "er" (i), "m" (v->counter) : "memory"); } /** * arch_atomic64_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @v: pointer to type atomic64_t * * Atomically subtracts @i from @v and returns * true if the result is zero, or false for all * other cases. */ static inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i); } #define arch_atomic64_sub_and_test arch_atomic64_sub_and_test /** * arch_atomic64_inc - increment atomic64 variable * @v: pointer to type atomic64_t * * Atomically increments @v by 1. */ static __always_inline void arch_atomic64_inc(atomic64_t *v) { asm volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) : "m" (v->counter) : "memory"); } #define arch_atomic64_inc arch_atomic64_inc /** * arch_atomic64_dec - decrement atomic64 variable * @v: pointer to type atomic64_t * * Atomically decrements @v by 1. */ static __always_inline void arch_atomic64_dec(atomic64_t *v) { asm volatile(LOCK_PREFIX "decq %0" : "=m" (v->counter) : "m" (v->counter) : "memory"); } #define arch_atomic64_dec arch_atomic64_dec /** * arch_atomic64_dec_and_test - decrement and test * @v: pointer to type atomic64_t * * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other * cases. */ static inline bool arch_atomic64_dec_and_test(atomic64_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, e); } #define arch_atomic64_dec_and_test arch_atomic64_dec_and_test /** * arch_atomic64_inc_and_test - increment and test * @v: pointer to type atomic64_t * * Atomically increments @v by 1 * and returns true if the result is zero, or false for all * other cases. */ static inline bool arch_atomic64_inc_and_test(atomic64_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, e); } #define arch_atomic64_inc_and_test arch_atomic64_inc_and_test /** * arch_atomic64_add_negative - add and test if negative * @i: integer value to add * @v: pointer to type atomic64_t * * Atomically adds @i to @v and returns true * if the result is negative, or false when * result is greater than or equal to zero. */ static inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i); } #define arch_atomic64_add_negative arch_atomic64_add_negative /** * arch_atomic64_add_return - add and return * @i: integer value to add * @v: pointer to type atomic64_t * * Atomically adds @i to @v and returns @i + @v */ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { return i + xadd(&v->counter, i); } #define arch_atomic64_add_return arch_atomic64_add_return static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) { return arch_atomic64_add_return(-i, v); } #define arch_atomic64_sub_return arch_atomic64_sub_return static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { return xadd(&v->counter, i); } #define arch_atomic64_fetch_add arch_atomic64_fetch_add static inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v) { return xadd(&v->counter, -i); } #define arch_atomic64_fetch_sub arch_atomic64_fetch_sub static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { return arch_cmpxchg(&v->counter, old, new); } #define arch_atomic64_cmpxchg arch_atomic64_cmpxchg static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { return try_cmpxchg(&v->counter, old, new); } #define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new) { return arch_xchg(&v->counter, new); } #define arch_atomic64_xchg arch_atomic64_xchg static inline void arch_atomic64_and(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "andq %1,%0" : "+m" (v->counter) : "er" (i) : "memory"); } static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i)); return val; } #define arch_atomic64_fetch_and arch_atomic64_fetch_and static inline void arch_atomic64_or(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "orq %1,%0" : "+m" (v->counter) : "er" (i) : "memory"); } static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i)); return val; } #define arch_atomic64_fetch_or arch_atomic64_fetch_or static inline void arch_atomic64_xor(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "xorq %1,%0" : "+m" (v->counter) : "er" (i) : "memory"); } static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i)); return val; } #define arch_atomic64_fetch_xor arch_atomic64_fetch_xor #endif /* _ASM_X86_ATOMIC64_64_H */
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 /* SPDX-License-Identifier: GPL-2.0 */ /* * include/linux/cpu.h - generic cpu definition * * This is mainly for topological representation. We define the * basic 'struct cpu' here, which can be embedded in per-arch * definitions of processors. * * Basic handling of the devices is done in drivers/base/cpu.c * * CPUs are exported via sysfs in the devices/system/cpu * directory. */ #ifndef _LINUX_CPU_H_ #define _LINUX_CPU_H_ #include <linux/node.h> #include <linux/compiler.h> #include <linux/cpumask.h> #include <linux/cpuhotplug.h> struct device; struct device_node; struct attribute_group; struct cpu { int node_id; /* The node which contains the CPU */ int hotpluggable; /* creates sysfs control file if hotpluggable */ struct device dev; }; extern void boot_cpu_init(void); extern void boot_cpu_hotplug_init(void); extern void cpu_init(void); extern void trap_init(void); extern int register_cpu(struct cpu *cpu, int num); extern struct device *get_cpu_device(unsigned cpu); extern bool cpu_is_hotpluggable(unsigned cpu); extern bool arch_match_cpu_phys_id(int cpu, u64 phys_id); extern bool arch_find_n_match_cpu_physical_id(struct device_node *cpun, int cpu, unsigned int *thread); extern int cpu_add_dev_attr(struct device_attribute *attr); extern void cpu_remove_dev_attr(struct device_attribute *attr); extern int cpu_add_dev_attr_group(struct attribute_group *attrs); extern void cpu_remove_dev_attr_group(struct attribute_group *attrs); extern ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, const struct attribute_group **groups, const char *fmt, ...); #ifdef CONFIG_HOTPLUG_CPU extern void unregister_cpu(struct cpu *cpu); extern ssize_t arch_cpu_probe(const char *, size_t); extern ssize_t arch_cpu_release(const char *, size_t); #endif /* * These states are not related to the core CPU hotplug mechanism. They are * used by various (sub)architectures to track internal state */ #define CPU_ONLINE 0x0002 /* CPU is up */ #define CPU_UP_PREPARE 0x0003 /* CPU coming up */ #define CPU_DEAD 0x0007 /* CPU dead */ #define CPU_DEAD_FROZEN 0x0008 /* CPU timed out on unplug */ #define CPU_POST_DEAD 0x0009 /* CPU successfully unplugged */ #define CPU_BROKEN 0x000B /* CPU did not die properly */ #ifdef CONFIG_SMP extern bool cpuhp_tasks_frozen; int add_cpu(unsigned int cpu); int cpu_device_up(struct device *dev); void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); int bringup_hibernate_cpu(unsigned int sleep_cpu); void bringup_nonboot_cpus(unsigned int setup_max_cpus); #else /* CONFIG_SMP */ #define cpuhp_tasks_frozen 0 static inline void cpu_maps_update_begin(void) { } static inline void cpu_maps_update_done(void) { } #endif /* CONFIG_SMP */ extern struct bus_type cpu_subsys; #ifdef CONFIG_HOTPLUG_CPU extern void cpus_write_lock(void); extern void cpus_write_unlock(void); extern void cpus_read_lock(void); extern void cpus_read_unlock(void); extern int cpus_read_trylock(void); extern void lockdep_assert_cpus_held(void); extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); void clear_tasks_mm_cpumask(int cpu); int remove_cpu(unsigned int cpu); int cpu_device_down(struct device *dev); extern void smp_shutdown_nonboot_cpus(unsigned int primary_cpu); #else /* CONFIG_HOTPLUG_CPU */ static inline void cpus_write_lock(void) { } static inline void cpus_write_unlock(void) { } static inline void cpus_read_lock(void) { } static inline void cpus_read_unlock(void) { } static inline int cpus_read_trylock(void) { return true; } static inline void lockdep_assert_cpus_held(void) { } static inline void cpu_hotplug_disable(void) { } static inline void cpu_hotplug_enable(void) { } static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } #endif /* !CONFIG_HOTPLUG_CPU */ /* Wrappers which go away once all code is converted */ static inline void cpu_hotplug_begin(void) { cpus_write_lock(); } static inline void cpu_hotplug_done(void) { cpus_write_unlock(); } static inline void get_online_cpus(void) { cpus_read_lock(); } static inline void put_online_cpus(void) { cpus_read_unlock(); } #ifdef CONFIG_PM_SLEEP_SMP extern int freeze_secondary_cpus(int primary); extern void thaw_secondary_cpus(void); static inline int suspend_disable_secondary_cpus(void) { int cpu = 0; if (IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) cpu = -1; return freeze_secondary_cpus(cpu); } static inline void suspend_enable_secondary_cpus(void) { return thaw_secondary_cpus(); } #else /* !CONFIG_PM_SLEEP_SMP */ static inline void thaw_secondary_cpus(void) {} static inline int suspend_disable_secondary_cpus(void) { return 0; } static inline void suspend_enable_secondary_cpus(void) { } #endif /* !CONFIG_PM_SLEEP_SMP */ void cpu_startup_entry(enum cpuhp_state state); void cpu_idle_poll_ctrl(bool enable); /* Attach to any functions which should be considered cpuidle. */ #define __cpuidle __section(".cpuidle.text") bool cpu_in_idle(unsigned long pc); void arch_cpu_idle(void); void arch_cpu_idle_prepare(void); void arch_cpu_idle_enter(void); void arch_cpu_idle_exit(void); void arch_cpu_idle_dead(void); int cpu_report_state(int cpu); int cpu_check_up_prepare(int cpu); void cpu_set_state_online(int cpu); void play_idle_precise(u64 duration_ns, u64 latency_ns); static inline void play_idle(unsigned long duration_us) { play_idle_precise(duration_us * NSEC_PER_USEC, U64_MAX); } #ifdef CONFIG_HOTPLUG_CPU bool cpu_wait_death(unsigned int cpu, int seconds); bool cpu_report_death(void); void cpuhp_report_idle_dead(void); #else static inline void cpuhp_report_idle_dead(void) { } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ enum cpuhp_smt_control { CPU_SMT_ENABLED, CPU_SMT_DISABLED, CPU_SMT_FORCE_DISABLED, CPU_SMT_NOT_SUPPORTED, CPU_SMT_NOT_IMPLEMENTED, }; #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) extern enum cpuhp_smt_control cpu_smt_control; extern void cpu_smt_disable(bool force); extern void cpu_smt_check_topology(void); extern bool cpu_smt_possible(void); extern int cpuhp_smt_enable(void); extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval); #else # define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED) static inline void cpu_smt_disable(bool force) { } static inline void cpu_smt_check_topology(void) { } static inline bool cpu_smt_possible(void) { return false; } static inline int cpuhp_smt_enable(void) { return 0; } static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } #endif extern bool cpu_mitigations_off(void); extern bool cpu_mitigations_auto_nosmt(void); #endif /* _LINUX_CPU_H_ */
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 // SPDX-License-Identifier: GPL-2.0 /* * fs/sysfs/group.c - Operations for adding/removing multiple files at once. * * Copyright (c) 2003 Patrick Mochel * Copyright (c) 2003 Open Source Development Lab * Copyright (c) 2013 Greg Kroah-Hartman * Copyright (c) 2013 The Linux Foundation */ #include <linux/kobject.h> #include <linux/module.h> #include <linux/dcache.h> #include <linux/namei.h> #include <linux/err.h> #include <linux/fs.h> #include "sysfs.h" static void remove_files(struct kernfs_node *parent, const struct attribute_group *grp) { struct attribute *const *attr; struct bin_attribute *const *bin_attr; if (grp->attrs) for (attr = grp->attrs; *attr; attr++) kernfs_remove_by_name(parent, (*attr)->name); if (grp->bin_attrs) for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) kernfs_remove_by_name(parent, (*bin_attr)->attr.name); } static int create_files(struct kernfs_node *parent, struct kobject *kobj, kuid_t uid, kgid_t gid, const struct attribute_group *grp, int update) { struct attribute *const *attr; struct bin_attribute *const *bin_attr; int error = 0, i; if (grp->attrs) { for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) { umode_t mode = (*attr)->mode; /* * In update mode, we're changing the permissions or * visibility. Do this by first removing then * re-adding (if required) the file. */ if (update) kernfs_remove_by_name(parent, (*attr)->name); if (grp->is_visible) { mode = grp->is_visible(kobj, *attr, i); if (!mode) continue; } WARN(mode & ~(SYSFS_PREALLOC | 0664), "Attribute %s: Invalid permissions 0%o\n", (*attr)->name, mode); mode &= SYSFS_PREALLOC | 0664; error = sysfs_add_file_mode_ns(parent, *attr, false, mode, uid, gid, NULL); if (unlikely(error)) break; } if (error) { remove_files(parent, grp); goto exit; } } if (grp->bin_attrs) { for (i = 0, bin_attr = grp->bin_attrs; *bin_attr; i++, bin_attr++) { umode_t mode = (*bin_attr)->attr.mode; if (update) kernfs_remove_by_name(parent, (*bin_attr)->attr.name); if (grp->is_bin_visible) { mode = grp->is_bin_visible(kobj, *bin_attr, i); if (!mode) continue; } WARN(mode & ~(SYSFS_PREALLOC | 0664), "Attribute %s: Invalid permissions 0%o\n", (*bin_attr)->attr.name, mode); mode &= SYSFS_PREALLOC | 0664; error = sysfs_add_file_mode_ns(parent, &(*bin_attr)->attr, true, mode, uid, gid, NULL); if (error) break; } if (error) remove_files(parent, grp); } exit: return error; } static int internal_create_group(struct kobject *kobj, int update, const struct attribute_group *grp) { struct kernfs_node *kn; kuid_t uid; kgid_t gid; int error; if (WARN_ON(!kobj || (!update && !kobj->sd))) return -EINVAL; /* Updates may happen before the object has been instantiated */ if (unlikely(update && !kobj->sd)) return -EINVAL; if (!grp->attrs && !grp->bin_attrs) { WARN(1, "sysfs: (bin_)attrs not set by subsystem for group: %s/%s\n", kobj->name, grp->name ?: ""); return -EINVAL; } kobject_get_ownership(kobj, &uid, &gid); if (grp->name) { if (update) { kn = kernfs_find_and_get(kobj->sd, grp->name); if (!kn) { pr_warn("Can't update unknown attr grp name: %s/%s\n", kobj->name, grp->name); return -EINVAL; } } else { kn = kernfs_create_dir_ns(kobj->sd, grp->name, S_IRWXU | S_IRUGO | S_IXUGO, uid, gid, kobj, NULL); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) sysfs_warn_dup(kobj->sd, grp->name); return PTR_ERR(kn); } } } else kn = kobj->sd; kernfs_get(kn); error = create_files(kn, kobj, uid, gid, grp, update); if (error) { if (grp->name) kernfs_remove(kn); } kernfs_put(kn); if (grp->name && update) kernfs_put(kn); return error; } /** * sysfs_create_group - given a directory kobject, create an attribute group * @kobj: The kobject to create the group on * @grp: The attribute group to create * * This function creates a group for the first time. It will explicitly * warn and error if any of the attribute files being created already exist. * * Returns 0 on success or error code on failure. */ int sysfs_create_group(struct kobject *kobj, const struct attribute_group *grp) { return internal_create_group(kobj, 0, grp); } EXPORT_SYMBOL_GPL(sysfs_create_group); static int internal_create_groups(struct kobject *kobj, int update, const struct attribute_group **groups) { int error = 0; int i; if (!groups) return 0; for (i = 0; groups[i]; i++) { error = internal_create_group(kobj, update, groups[i]); if (error) { while (--i >= 0) sysfs_remove_group(kobj, groups[i]); break; } } return error; } /** * sysfs_create_groups - given a directory kobject, create a bunch of attribute groups * @kobj: The kobject to create the group on * @groups: The attribute groups to create, NULL terminated * * This function creates a bunch of attribute groups. If an error occurs when * creating a group, all previously created groups will be removed, unwinding * everything back to the original state when this function was called. * It will explicitly warn and error if any of the attribute files being * created already exist. * * Returns 0 on success or error code from sysfs_create_group on failure. */ int sysfs_create_groups(struct kobject *kobj, const struct attribute_group **groups) { return internal_create_groups(kobj, 0, groups); } EXPORT_SYMBOL_GPL(sysfs_create_groups); /** * sysfs_update_groups - given a directory kobject, create a bunch of attribute groups * @kobj: The kobject to update the group on * @groups: The attribute groups to update, NULL terminated * * This function update a bunch of attribute groups. If an error occurs when * updating a group, all previously updated groups will be removed together * with already existing (not updated) attributes. * * Returns 0 on success or error code from sysfs_update_group on failure. */ int sysfs_update_groups(struct kobject *kobj, const struct attribute_group **groups) { return internal_create_groups(kobj, 1, groups); } EXPORT_SYMBOL_GPL(sysfs_update_groups); /** * sysfs_update_group - given a directory kobject, update an attribute group * @kobj: The kobject to update the group on * @grp: The attribute group to update * * This function updates an attribute group. Unlike * sysfs_create_group(), it will explicitly not warn or error if any * of the attribute files being created already exist. Furthermore, * if the visibility of the files has changed through the is_visible() * callback, it will update the permissions and add or remove the * relevant files. Changing a group's name (subdirectory name under * kobj's directory in sysfs) is not allowed. * * The primary use for this function is to call it after making a change * that affects group visibility. * * Returns 0 on success or error code on failure. */ int sysfs_update_group(struct kobject *kobj, const struct attribute_group *grp) { return internal_create_group(kobj, 1, grp); } EXPORT_SYMBOL_GPL(sysfs_update_group); /** * sysfs_remove_group: remove a group from a kobject * @kobj: kobject to remove the group from * @grp: group to remove * * This function removes a group of attributes from a kobject. The attributes * previously have to have been created for this group, otherwise it will fail. */ void sysfs_remove_group(struct kobject *kobj, const struct attribute_group *grp) { struct kernfs_node *parent = kobj->sd; struct kernfs_node *kn; if (grp->name) { kn = kernfs_find_and_get(parent, grp->name); if (!kn) { WARN(!kn, KERN_WARNING "sysfs group '%s' not found for kobject '%s'\n", grp->name, kobject_name(kobj)); return; } } else { kn = parent; kernfs_get(kn); } remove_files(kn, grp); if (grp->name) kernfs_remove(kn); kernfs_put(kn); } EXPORT_SYMBOL_GPL(sysfs_remove_group); /** * sysfs_remove_groups - remove a list of groups * * @kobj: The kobject for the groups to be removed from * @groups: NULL terminated list of groups to be removed * * If groups is not NULL, remove the specified groups from the kobject. */ void sysfs_remove_groups(struct kobject *kobj, const struct attribute_group **groups) { int i; if (!groups) return; for (i = 0; groups[i]; i++) sysfs_remove_group(kobj, groups[i]); } EXPORT_SYMBOL_GPL(sysfs_remove_groups); /** * sysfs_merge_group - merge files into a pre-existing attribute group. * @kobj: The kobject containing the group. * @grp: The files to create and the attribute group they belong to. * * This function returns an error if the group doesn't exist or any of the * files already exist in that group, in which case none of the new files * are created. */ int sysfs_merge_group(struct kobject *kobj, const struct attribute_group *grp) { struct kernfs_node *parent; kuid_t uid; kgid_t gid; int error = 0; struct attribute *const *attr; int i; parent = kernfs_find_and_get(kobj->sd, grp->name); if (!parent) return -ENOENT; kobject_get_ownership(kobj, &uid, &gid); for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr)) error = sysfs_add_file_mode_ns(parent, *attr, false, (*attr)->mode, uid, gid, NULL); if (error) { while (--i >= 0) kernfs_remove_by_name(parent, (*--attr)->name); } kernfs_put(parent); return error; } EXPORT_SYMBOL_GPL(sysfs_merge_group); /** * sysfs_unmerge_group - remove files from a pre-existing attribute group. * @kobj: The kobject containing the group. * @grp: The files to remove and the attribute group they belong to. */ void sysfs_unmerge_group(struct kobject *kobj, const struct attribute_group *grp) { struct kernfs_node *parent; struct attribute *const *attr; parent = kernfs_find_and_get(kobj->sd, grp->name); if (parent) { for (attr = grp->attrs; *attr; ++attr) kernfs_remove_by_name(parent, (*attr)->name); kernfs_put(parent); } } EXPORT_SYMBOL_GPL(sysfs_unmerge_group); /** * sysfs_add_link_to_group - add a symlink to an attribute group. * @kobj: The kobject containing the group. * @group_name: The name of the group. * @target: The target kobject of the symlink to create. * @link_name: The name of the symlink to create. */ int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name, struct kobject *target, const char *link_name) { struct kernfs_node *parent; int error = 0; parent = kernfs_find_and_get(kobj->sd, group_name); if (!parent) return -ENOENT; error = sysfs_create_link_sd(parent, target, link_name); kernfs_put(parent); return error; } EXPORT_SYMBOL_GPL(sysfs_add_link_to_group); /** * sysfs_remove_link_from_group - remove a symlink from an attribute group. * @kobj: The kobject containing the group. * @group_name: The name of the group. * @link_name: The name of the symlink to remove. */ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name, const char *link_name) { struct kernfs_node *parent; parent = kernfs_find_and_get(kobj->sd, group_name); if (parent) { kernfs_remove_by_name(parent, link_name); kernfs_put(parent); } } EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group); /** * compat_only_sysfs_link_entry_to_kobj - add a symlink to a kobject pointing * to a group or an attribute * @kobj: The kobject containing the group. * @target_kobj: The target kobject. * @target_name: The name of the target group or attribute. * @symlink_name: The name of the symlink file (target_name will be * considered if symlink_name is NULL). */ int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, struct kobject *target_kobj, const char *target_name, const char *symlink_name) { struct kernfs_node *target; struct kernfs_node *entry; struct kernfs_node *link; /* * We don't own @target_kobj and it may be removed at any time. * Synchronize using sysfs_symlink_target_lock. See sysfs_remove_dir() * for details. */ spin_lock(&sysfs_symlink_target_lock); target = target_kobj->sd; if (target) kernfs_get(target); spin_unlock(&sysfs_symlink_target_lock); if (!target) return -ENOENT; entry = kernfs_find_and_get(target_kobj->sd, target_name); if (!entry) { kernfs_put(target); return -ENOENT; } if (!symlink_name) symlink_name = target_name; link = kernfs_create_link(kobj->sd, symlink_name, entry); if (PTR_ERR(link) == -EEXIST) sysfs_warn_dup(kobj->sd, symlink_name); kernfs_put(entry); kernfs_put(target); return PTR_ERR_OR_ZERO(link); } EXPORT_SYMBOL_GPL(compat_only_sysfs_link_entry_to_kobj); static int sysfs_group_attrs_change_owner(struct kernfs_node *grp_kn, const struct attribute_group *grp, struct iattr *newattrs) { struct kernfs_node *kn; int error; if (grp->attrs) { struct attribute *const *attr; for (attr = grp->attrs; *attr; attr++) { kn = kernfs_find_and_get(grp_kn, (*attr)->name); if (!kn) return -ENOENT; error = kernfs_setattr(kn, newattrs); kernfs_put(kn); if (error) return error; } } if (grp->bin_attrs) { struct bin_attribute *const *bin_attr; for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) { kn = kernfs_find_and_get(grp_kn, (*bin_attr)->attr.name); if (!kn) return -ENOENT; error = kernfs_setattr(kn, newattrs); kernfs_put(kn); if (error) return error; } } return 0; } /** * sysfs_group_change_owner - change owner of an attribute group. * @kobj: The kobject containing the group. * @grp: The attribute group. * @kuid: new owner's kuid * @kgid: new owner's kgid * * Returns 0 on success or error code on failure. */ int sysfs_group_change_owner(struct kobject *kobj, const struct attribute_group *grp, kuid_t kuid, kgid_t kgid) { struct kernfs_node *grp_kn; int error; struct iattr newattrs = { .ia_valid = ATTR_UID | ATTR_GID, .ia_uid = kuid, .ia_gid = kgid, }; if (!kobj->state_in_sysfs) return -EINVAL; if (grp->name) { grp_kn = kernfs_find_and_get(kobj->sd, grp->name); } else { kernfs_get(kobj->sd); grp_kn = kobj->sd; } if (!grp_kn) return -ENOENT; error = kernfs_setattr(grp_kn, &newattrs); if (!error) error = sysfs_group_attrs_change_owner(grp_kn, grp, &newattrs); kernfs_put(grp_kn); return error; } EXPORT_SYMBOL_GPL(sysfs_group_change_owner); /** * sysfs_groups_change_owner - change owner of a set of attribute groups. * @kobj: The kobject containing the groups. * @groups: The attribute groups. * @kuid: new owner's kuid * @kgid: new owner's kgid * * Returns 0 on success or error code on failure. */ int sysfs_groups_change_owner(struct kobject *kobj, const struct attribute_group **groups, kuid_t kuid, kgid_t kgid) { int error = 0, i; if (!kobj->state_in_sysfs) return -EINVAL; if (!groups) return 0; for (i = 0; groups[i]; i++) { error = sysfs_group_change_owner(kobj, groups[i], kuid, kgid); if (error) break; } return error; } EXPORT_SYMBOL_GPL(sysfs_groups_change_owner);
1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 // SPDX-License-Identifier: GPL-2.0 /* * linux/lib/kasprintf.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include <stdarg.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/string.h> /* Simplified asprintf. */ char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap) { unsigned int first, second; char *p; va_list aq; va_copy(aq, ap); first = vsnprintf(NULL, 0, fmt, aq); va_end(aq); p = kmalloc_track_caller(first+1, gfp); if (!p) return NULL; second = vsnprintf(p, first+1, fmt, ap); WARN(first != second, "different return values (%u and %u) from vsnprintf(\"%s\", ...)", first, second, fmt); return p; } EXPORT_SYMBOL(kvasprintf); /* * If fmt contains no % (or is exactly %s), use kstrdup_const. If fmt * (or the sole vararg) points to rodata, we will then save a memory * allocation and string copy. In any case, the return value should be * freed using kfree_const(). */ const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list ap) { if (!strchr(fmt, '%')) return kstrdup_const(fmt, gfp); if (!strcmp(fmt, "%s")) return kstrdup_const(va_arg(ap, const char*), gfp); return kvasprintf(gfp, fmt, ap); } EXPORT_SYMBOL(kvasprintf_const); char *kasprintf(gfp_t gfp, const char *fmt, ...) { va_list ap; char *p; va_start(ap, fmt); p = kvasprintf(gfp, fmt, ap); va_end(ap); return p; } EXPORT_SYMBOL(kasprintf);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 #ifndef _LINUX_SCHED_ISOLATION_H #define _LINUX_SCHED_ISOLATION_H #include <linux/cpumask.h> #include <linux/init.h> #include <linux/tick.h> enum hk_flags { HK_FLAG_TIMER = 1, HK_FLAG_RCU = (1 << 1), HK_FLAG_MISC = (1 << 2), HK_FLAG_SCHED = (1 << 3), HK_FLAG_TICK = (1 << 4), HK_FLAG_DOMAIN = (1 << 5), HK_FLAG_WQ = (1 << 6), HK_FLAG_MANAGED_IRQ = (1 << 7), HK_FLAG_KTHREAD = (1 << 8), }; #ifdef CONFIG_CPU_ISOLATION DECLARE_STATIC_KEY_FALSE(housekeeping_overridden); extern int housekeeping_any_cpu(enum hk_flags flags); extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags); extern bool housekeeping_enabled(enum hk_flags flags); extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags); extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags); extern void __init housekeeping_init(void); #else static inline int housekeeping_any_cpu(enum hk_flags flags) { return smp_processor_id(); } static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags) { return cpu_possible_mask; } static inline bool housekeeping_enabled(enum hk_flags flags) { return false; } static inline void housekeeping_affine(struct task_struct *t, enum hk_flags flags) { } static inline void housekeeping_init(void) { } #endif /* CONFIG_CPU_ISOLATION */ static inline bool housekeeping_cpu(int cpu, enum hk_flags flags) { #ifdef CONFIG_CPU_ISOLATION if (static_branch_unlikely(&housekeeping_overridden)) return housekeeping_test_cpu(cpu, flags); #endif return true; } #endif /* _LINUX_SCHED_ISOLATION_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_DST_METADATA_H #define __NET_DST_METADATA_H 1 #include <linux/skbuff.h> #include <net/ip_tunnels.h> #include <net/dst.h> enum metadata_type { METADATA_IP_TUNNEL, METADATA_HW_PORT_MUX, }; struct hw_port_info { struct net_device *lower_dev; u32 port_id; }; struct metadata_dst { struct dst_entry dst; enum metadata_type type; union { struct ip_tunnel_info tun_info; struct hw_port_info port_info; } u; }; static inline struct metadata_dst *skb_metadata_dst(const struct sk_buff *skb) { struct metadata_dst *md_dst = (struct metadata_dst *) skb_dst(skb); if (md_dst && md_dst->dst.flags & DST_METADATA) return md_dst; return NULL; } static inline struct ip_tunnel_info * skb_tunnel_info(const struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; if (md_dst && md_dst->type == METADATA_IP_TUNNEL) return &md_dst->u.tun_info; dst = skb_dst(skb); if (dst && dst->lwtstate && (dst->lwtstate->type == LWTUNNEL_ENCAP_IP || dst->lwtstate->type == LWTUNNEL_ENCAP_IP6)) return lwt_tun_info(dst->lwtstate); return NULL; } static inline bool skb_valid_dst(const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); return dst && !(dst->flags & DST_METADATA); } static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, const struct sk_buff *skb_b) { const struct metadata_dst *a, *b; if (!(skb_a->_skb_refdst | skb_b->_skb_refdst)) return 0; a = (const struct metadata_dst *) skb_dst(skb_a); b = (const struct metadata_dst *) skb_dst(skb_b); if (!a != !b || a->type != b->type) return 1; switch (a->type) { case METADATA_HW_PORT_MUX: return memcmp(&a->u.port_info, &b->u.port_info, sizeof(a->u.port_info)); case METADATA_IP_TUNNEL: return memcmp(&a->u.tun_info, &b->u.tun_info, sizeof(a->u.tun_info) + a->u.tun_info.options_len); default: return 1; } } void metadata_dst_free(struct metadata_dst *); struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, gfp_t flags); void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst); struct metadata_dst __percpu * metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags); static inline struct metadata_dst *tun_rx_dst(int md_size) { struct metadata_dst *tun_dst; tun_dst = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!tun_dst) return NULL; tun_dst->u.tun_info.options_len = 0; tun_dst->u.tun_info.mode = 0; return tun_dst; } static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); int md_size; struct metadata_dst *new_md; if (!md_dst || md_dst->type != METADATA_IP_TUNNEL) return ERR_PTR(-EINVAL); md_size = md_dst->u.tun_info.options_len; new_md = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!new_md) return ERR_PTR(-ENOMEM); memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, sizeof(struct ip_tunnel_info) + md_size); skb_dst_drop(skb); dst_hold(&new_md->dst); skb_dst_set(skb, &new_md->dst); return new_md; } static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb) { struct metadata_dst *dst; dst = tun_dst_unclone(skb); if (IS_ERR(dst)) return NULL; return &dst->u.tun_info; } static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, __be32 daddr, __u8 tos, __u8 ttl, __be16 tp_dst, __be16 flags, __be64 tunnel_id, int md_size) { struct metadata_dst *tun_dst; tun_dst = tun_rx_dst(md_size); if (!tun_dst) return NULL; ip_tunnel_key_init(&tun_dst->u.tun_info.key, saddr, daddr, tos, ttl, 0, 0, tp_dst, tunnel_id, flags); return tun_dst; } static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, __be16 flags, __be64 tunnel_id, int md_size) { const struct iphdr *iph = ip_hdr(skb); return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl, 0, flags, tunnel_id, md_size); } static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr, const struct in6_addr *daddr, __u8 tos, __u8 ttl, __be16 tp_dst, __be32 label, __be16 flags, __be64 tunnel_id, int md_size) { struct metadata_dst *tun_dst; struct ip_tunnel_info *info; tun_dst = tun_rx_dst(md_size); if (!tun_dst) return NULL; info = &tun_dst->u.tun_info; info->mode = IP_TUNNEL_INFO_IPV6; info->key.tun_flags = flags; info->key.tun_id = tunnel_id; info->key.tp_src = 0; info->key.tp_dst = tp_dst; info->key.u.ipv6.src = *saddr; info->key.u.ipv6.dst = *daddr; info->key.tos = tos; info->key.ttl = ttl; info->key.label = label; return tun_dst; } static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, __be16 flags, __be64 tunnel_id, int md_size) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr, ipv6_get_dsfield(ip6h), ip6h->hop_limit, 0, ip6_flowlabel(ip6h), flags, tunnel_id, md_size); } #endif /* __NET_DST_METADATA_H */
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 // SPDX-License-Identifier: GPL-2.0-or-later /* * Synchronous Cryptographic Hash operations. * * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/scatterwalk.h> #include <crypto/internal/hash.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/seq_file.h> #include <linux/cryptouser.h> #include <net/netlink.h> #include <linux/compiler.h> #include "internal.h" static const struct crypto_type crypto_shash_type; static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { return -ENOSYS; } /* * Check whether an shash algorithm has a setkey function. * * For CFI compatibility, this must not be an inline function. This is because * when CFI is enabled, modules won't get the same address for shash_no_setkey * (if it were exported, which inlining would require) as the core kernel will. */ bool crypto_shash_alg_has_setkey(struct shash_alg *alg) { return alg->setkey != shash_no_setkey; } EXPORT_SYMBOL_GPL(crypto_shash_alg_has_setkey); static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { struct shash_alg *shash = crypto_shash_alg(tfm); unsigned long alignmask = crypto_shash_alignmask(tfm); unsigned long absize; u8 *buffer, *alignbuffer; int err; absize = keylen + (alignmask & ~(crypto_tfm_ctx_alignment() - 1)); buffer = kmalloc(absize, GFP_ATOMIC); if (!buffer) return -ENOMEM; alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); memcpy(alignbuffer, key, keylen); err = shash->setkey(tfm, alignbuffer, keylen); kfree_sensitive(buffer); return err; } static void shash_set_needkey(struct crypto_shash *tfm, struct shash_alg *alg) { if (crypto_shash_alg_needs_key(alg)) crypto_shash_set_flags(tfm, CRYPTO_TFM_NEED_KEY); } int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { struct shash_alg *shash = crypto_shash_alg(tfm); unsigned long alignmask = crypto_shash_alignmask(tfm); int err; if ((unsigned long)key & alignmask) err = shash_setkey_unaligned(tfm, key, keylen); else err = shash->setkey(tfm, key, keylen); if (unlikely(err)) { shash_set_needkey(tfm, shash); return err; } crypto_shash_clear_flags(tfm, CRYPTO_TFM_NEED_KEY); return 0; } EXPORT_SYMBOL_GPL(crypto_shash_setkey); static int shash_update_unaligned(struct shash_desc *desc, const u8 *data, unsigned int len) { struct crypto_shash *tfm = desc->tfm; struct shash_alg *shash = crypto_shash_alg(tfm); unsigned long alignmask = crypto_shash_alignmask(tfm); unsigned int unaligned_len = alignmask + 1 - ((unsigned long)data & alignmask); /* * We cannot count on __aligned() working for large values: * https://patchwork.kernel.org/patch/9507697/ */ u8 ubuf[MAX_ALGAPI_ALIGNMASK * 2]; u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1); int err; if (WARN_ON(buf + unaligned_len > ubuf + sizeof(ubuf))) return -EINVAL; if (unaligned_len > len) unaligned_len = len; memcpy(buf, data, unaligned_len); err = shash->update(desc, buf, unaligned_len); memset(buf, 0, unaligned_len); return err ?: shash->update(desc, data + unaligned_len, len - unaligned_len); } int crypto_shash_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct crypto_shash *tfm = desc->tfm; struct shash_alg *shash = crypto_shash_alg(tfm); unsigned long alignmask = crypto_shash_alignmask(tfm); if ((unsigned long)data & alignmask) return shash_update_unaligned(desc, data, len); return shash->update(desc, data, len); } EXPORT_SYMBOL_GPL(crypto_shash_update); static int shash_final_unaligned(struct shash_desc *desc, u8 *out) { struct crypto_shash *tfm = desc->tfm; unsigned long alignmask = crypto_shash_alignmask(tfm); struct shash_alg *shash = crypto_shash_alg(tfm); unsigned int ds = crypto_shash_digestsize(tfm); /* * We cannot count on __aligned() working for large values: * https://patchwork.kernel.org/patch/9507697/ */ u8 ubuf[MAX_ALGAPI_ALIGNMASK + HASH_MAX_DIGESTSIZE]; u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1); int err; if (WARN_ON(buf + ds > ubuf + sizeof(ubuf))) return -EINVAL; err = shash->final(desc, buf); if (err) goto out; memcpy(out, buf, ds); out: memset(buf, 0, ds); return err; } int crypto_shash_final(struct shash_desc *desc, u8 *out) { struct crypto_shash *tfm = desc->tfm; struct shash_alg *shash = crypto_shash_alg(tfm); unsigned long alignmask = crypto_shash_alignmask(tfm); if ((unsigned long)out & alignmask) return shash_final_unaligned(desc, out); return shash->final(desc, out); } EXPORT_SYMBOL_GPL(crypto_shash_final); static int shash_finup_unaligned(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { return crypto_shash_update(desc, data, len) ?: crypto_shash_final(desc, out); } int crypto_shash_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { struct crypto_shash *tfm = desc->tfm; struct shash_alg *shash = crypto_shash_alg(tfm); unsigned long alignmask = crypto_shash_alignmask(tfm); if (((unsigned long)data | (unsigned long)out) & alignmask) return shash_finup_unaligned(desc, data, len, out); return shash->finup(desc, data, len, out); } EXPORT_SYMBOL_GPL(crypto_shash_finup); static int shash_digest_unaligned(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { return crypto_shash_init(desc) ?: crypto_shash_finup(desc, data, len, out); } int crypto_shash_digest(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { struct crypto_shash *tfm = desc->tfm; struct shash_alg *shash = crypto_shash_alg(tfm); unsigned long alignmask = crypto_shash_alignmask(tfm); if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) return -ENOKEY; if (((unsigned long)data | (unsigned long)out) & alignmask) return shash_digest_unaligned(desc, data, len, out); return shash->digest(desc, data, len, out); } EXPORT_SYMBOL_GPL(crypto_shash_digest); int crypto_shash_tfm_digest(struct crypto_shash *tfm, const u8 *data, unsigned int len, u8 *out) { SHASH_DESC_ON_STACK(desc, tfm); int err; desc->tfm = tfm; err = crypto_shash_digest(desc, data, len, out); shash_desc_zero(desc); return err; } EXPORT_SYMBOL_GPL(crypto_shash_tfm_digest); static int shash_default_export(struct shash_desc *desc, void *out) { memcpy(out, shash_desc_ctx(desc), crypto_shash_descsize(desc->tfm)); return 0; } static int shash_default_import(struct shash_desc *desc, const void *in) { memcpy(shash_desc_ctx(desc), in, crypto_shash_descsize(desc->tfm)); return 0; } static int shash_async_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen) { struct crypto_shash **ctx = crypto_ahash_ctx(tfm); return crypto_shash_setkey(*ctx, key, keylen); } static int shash_async_init(struct ahash_request *req) { struct crypto_shash **ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); struct shash_desc *desc = ahash_request_ctx(req); desc->tfm = *ctx; return crypto_shash_init(desc); } int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc) { struct crypto_hash_walk walk; int nbytes; for (nbytes = crypto_hash_walk_first(req, &walk); nbytes > 0; nbytes = crypto_hash_walk_done(&walk, nbytes)) nbytes = crypto_shash_update(desc, walk.data, nbytes); return nbytes; } EXPORT_SYMBOL_GPL(shash_ahash_update); static int shash_async_update(struct ahash_request *req) { return shash_ahash_update(req, ahash_request_ctx(req)); } static int shash_async_final(struct ahash_request *req) { return crypto_shash_final(ahash_request_ctx(req), req->result); } int shash_ahash_finup(struct ahash_request *req, struct shash_desc *desc) { struct crypto_hash_walk walk; int nbytes; nbytes = crypto_hash_walk_first(req, &walk); if (!nbytes) return crypto_shash_final(desc, req->result); do { nbytes = crypto_hash_walk_last(&walk) ? crypto_shash_finup(desc, walk.data, nbytes, req->result) : crypto_shash_update(desc, walk.data, nbytes); nbytes = crypto_hash_walk_done(&walk, nbytes); } while (nbytes > 0); return nbytes; } EXPORT_SYMBOL_GPL(shash_ahash_finup); static int shash_async_finup(struct ahash_request *req) { struct crypto_shash **ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); struct shash_desc *desc = ahash_request_ctx(req); desc->tfm = *ctx; return shash_ahash_finup(req, desc); } int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc) { unsigned int nbytes = req->nbytes; struct scatterlist *sg; unsigned int offset; int err; if (nbytes && (sg = req->src, offset = sg->offset, nbytes <= min(sg->length, ((unsigned int)(PAGE_SIZE)) - offset))) { void *data; data = kmap_atomic(sg_page(sg)); err = crypto_shash_digest(desc, data + offset, nbytes, req->result); kunmap_atomic(data); } else err = crypto_shash_init(desc) ?: shash_ahash_finup(req, desc); return err; } EXPORT_SYMBOL_GPL(shash_ahash_digest); static int shash_async_digest(struct ahash_request *req) { struct crypto_shash **ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); struct shash_desc *desc = ahash_request_ctx(req); desc->tfm = *ctx; return shash_ahash_digest(req, desc); } static int shash_async_export(struct ahash_request *req, void *out) { return crypto_shash_export(ahash_request_ctx(req), out); } static int shash_async_import(struct ahash_request *req, const void *in) { struct crypto_shash **ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); struct shash_desc *desc = ahash_request_ctx(req); desc->tfm = *ctx; return crypto_shash_import(desc, in); } static void crypto_exit_shash_ops_async(struct crypto_tfm *tfm) { struct crypto_shash **ctx = crypto_tfm_ctx(tfm); crypto_free_shash(*ctx); } int crypto_init_shash_ops_async(struct crypto_tfm *tfm) { struct crypto_alg *calg = tfm->__crt_alg; struct shash_alg *alg = __crypto_shash_alg(calg); struct crypto_ahash *crt = __crypto_ahash_cast(tfm); struct crypto_shash **ctx = crypto_tfm_ctx(tfm); struct crypto_shash *shash; if (!crypto_mod_get(calg)) return -EAGAIN; shash = crypto_create_tfm(calg, &crypto_shash_type); if (IS_ERR(shash)) { crypto_mod_put(calg); return PTR_ERR(shash); } *ctx = shash; tfm->exit = crypto_exit_shash_ops_async; crt->init = shash_async_init; crt->update = shash_async_update; crt->final = shash_async_final; crt->finup = shash_async_finup; crt->digest = shash_async_digest; if (crypto_shash_alg_has_setkey(alg)) crt->setkey = shash_async_setkey; crypto_ahash_set_flags(crt, crypto_shash_get_flags(shash) & CRYPTO_TFM_NEED_KEY); crt->export = shash_async_export; crt->import = shash_async_import; crt->reqsize = sizeof(struct shash_desc) + crypto_shash_descsize(shash); return 0; } static void crypto_shash_exit_tfm(struct crypto_tfm *tfm) { struct crypto_shash *hash = __crypto_shash_cast(tfm); struct shash_alg *alg = crypto_shash_alg(hash); alg->exit_tfm(hash); } static int crypto_shash_init_tfm(struct crypto_tfm *tfm) { struct crypto_shash *hash = __crypto_shash_cast(tfm); struct shash_alg *alg = crypto_shash_alg(hash); int err; hash->descsize = alg->descsize; shash_set_needkey(hash, alg); if (alg->exit_tfm) tfm->exit = crypto_shash_exit_tfm; if (!alg->init_tfm) return 0; err = alg->init_tfm(hash); if (err) return err; /* ->init_tfm() may have increased the descsize. */ if (WARN_ON_ONCE(hash->descsize > HASH_MAX_DESCSIZE)) { if (alg->exit_tfm) alg->exit_tfm(hash); return -EINVAL; } return 0; } static void crypto_shash_free_instance(struct crypto_instance *inst) { struct shash_instance *shash = shash_instance(inst); shash->free(shash); } #ifdef CONFIG_NET static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_hash rhash; struct shash_alg *salg = __crypto_shash_alg(alg); memset(&rhash, 0, sizeof(rhash)); strscpy(rhash.type, "shash", sizeof(rhash.type)); rhash.blocksize = alg->cra_blocksize; rhash.digestsize = salg->digestsize; return nla_put(skb, CRYPTOCFGA_REPORT_HASH, sizeof(rhash), &rhash); } #else static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg) { return -ENOSYS; } #endif static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg) __maybe_unused; static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg) { struct shash_alg *salg = __crypto_shash_alg(alg); seq_printf(m, "type : shash\n"); seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); seq_printf(m, "digestsize : %u\n", salg->digestsize); } static const struct crypto_type crypto_shash_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_shash_init_tfm, .free = crypto_shash_free_instance, #ifdef CONFIG_PROC_FS .show = crypto_shash_show, #endif .report = crypto_shash_report, .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, .type = CRYPTO_ALG_TYPE_SHASH, .tfmsize = offsetof(struct crypto_shash, base), }; int crypto_grab_shash(struct crypto_shash_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask) { spawn->base.frontend = &crypto_shash_type; return crypto_grab_spawn(&spawn->base, inst, name, type, mask); } EXPORT_SYMBOL_GPL(crypto_grab_shash); struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type, u32 mask) { return crypto_alloc_tfm(alg_name, &crypto_shash_type, type, mask); } EXPORT_SYMBOL_GPL(crypto_alloc_shash); static int shash_prepare_alg(struct shash_alg *alg) { struct crypto_alg *base = &alg->base; if (alg->digestsize > HASH_MAX_DIGESTSIZE || alg->descsize > HASH_MAX_DESCSIZE || alg->statesize > HASH_MAX_STATESIZE) return -EINVAL; if ((alg->export && !alg->import) || (alg->import && !alg->export)) return -EINVAL; base->cra_type = &crypto_shash_type; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; base->cra_flags |= CRYPTO_ALG_TYPE_SHASH; if (!alg->finup) alg->finup = shash_finup_unaligned; if (!alg->digest) alg->digest = shash_digest_unaligned; if (!alg->export) { alg->export = shash_default_export; alg->import = shash_default_import; alg->statesize = alg->descsize; } if (!alg->setkey) alg->setkey = shash_no_setkey; return 0; } int crypto_register_shash(struct shash_alg *alg) { struct crypto_alg *base = &alg->base; int err; err = shash_prepare_alg(alg); if (err) return err; return crypto_register_alg(base); } EXPORT_SYMBOL_GPL(crypto_register_shash); void crypto_unregister_shash(struct shash_alg *alg) { crypto_unregister_alg(&alg->base); } EXPORT_SYMBOL_GPL(crypto_unregister_shash); int crypto_register_shashes(struct shash_alg *algs, int count) { int i, ret; for (i = 0; i < count; i++) { ret = crypto_register_shash(&algs[i]); if (ret) goto err; } return 0; err: for (--i; i >= 0; --i) crypto_unregister_shash(&algs[i]); return ret; } EXPORT_SYMBOL_GPL(crypto_register_shashes); void crypto_unregister_shashes(struct shash_alg *algs, int count) { int i; for (i = count - 1; i >= 0; --i) crypto_unregister_shash(&algs[i]); } EXPORT_SYMBOL_GPL(crypto_unregister_shashes); int shash_register_instance(struct crypto_template *tmpl, struct shash_instance *inst) { int err; if (WARN_ON(!inst->free)) return -EINVAL; err = shash_prepare_alg(&inst->alg); if (err) return err; return crypto_register_instance(tmpl, shash_crypto_instance(inst)); } EXPORT_SYMBOL_GPL(shash_register_instance); void shash_free_singlespawn_instance(struct shash_instance *inst) { crypto_drop_spawn(shash_instance_ctx(inst)); kfree(inst); } EXPORT_SYMBOL_GPL(shash_free_singlespawn_instance); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Synchronous cryptographic hash type");
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_TASK_STACK_H #define _LINUX_SCHED_TASK_STACK_H /* * task->stack (kernel stack) handling interfaces: */ #include <linux/sched.h> #include <linux/magic.h> #ifdef CONFIG_THREAD_INFO_IN_TASK /* * When accessing the stack of a non-current task that might exit, use * try_get_task_stack() instead. task_stack_page will return a pointer * that could get freed out from under you. */ static inline void *task_stack_page(const struct task_struct *task) { return task->stack; } #define setup_thread_stack(new,old) do { } while(0) static inline unsigned long *end_of_stack(const struct task_struct *task) { #ifdef CONFIG_STACK_GROWSUP return (unsigned long *)((unsigned long)task->stack + THREAD_SIZE) - 1; #else return task->stack; #endif } #elif !defined(__HAVE_THREAD_FUNCTIONS) #define task_stack_page(task) ((void *)(task)->stack) static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) { *task_thread_info(p) = *task_thread_info(org); task_thread_info(p)->task = p; } /* * Return the address of the last usable long on the stack. * * When the stack grows down, this is just above the thread * info struct. Going any lower will corrupt the threadinfo. * * When the stack grows up, this is the highest address. * Beyond that position, we corrupt data on the next page. */ static inline unsigned long *end_of_stack(struct task_struct *p) { #ifdef CONFIG_STACK_GROWSUP return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1; #else return (unsigned long *)(task_thread_info(p) + 1); #endif } #endif #ifdef CONFIG_THREAD_INFO_IN_TASK static inline void *try_get_task_stack(struct task_struct *tsk) { return refcount_inc_not_zero(&tsk->stack_refcount) ? task_stack_page(tsk) : NULL; } extern void put_task_stack(struct task_struct *tsk); #else static inline void *try_get_task_stack(struct task_struct *tsk) { return task_stack_page(tsk); } static inline void put_task_stack(struct task_struct *tsk) {} #endif #define task_stack_end_corrupted(task) \ (*(end_of_stack(task)) != STACK_END_MAGIC) static inline int object_is_on_stack(const void *obj) { void *stack = task_stack_page(current); return (obj >= stack) && (obj < (stack + THREAD_SIZE)); } extern void thread_stack_cache_init(void); #ifdef CONFIG_DEBUG_STACK_USAGE static inline unsigned long stack_not_used(struct task_struct *p) { unsigned long *n = end_of_stack(p); do { /* Skip over canary */ # ifdef CONFIG_STACK_GROWSUP n--; # else n++; # endif } while (!*n); # ifdef CONFIG_STACK_GROWSUP return (unsigned long)end_of_stack(p) - (unsigned long)n; # else return (unsigned long)n - (unsigned long)end_of_stack(p); # endif } #endif extern void set_task_stack_end_magic(struct task_struct *tsk); #ifndef __HAVE_ARCH_KSTACK_END static inline int kstack_end(void *addr) { /* Reliable end of stack detection: * Some APM bios versions misalign the stack */ return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*))); } #endif #endif /* _LINUX_SCHED_TASK_STACK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 /* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * * This file is provided under a dual BSD/GPLv2 license. * * SipHash: a fast short-input PRF * https://131002.net/siphash/ * * This implementation is specifically for SipHash2-4 for a secure PRF * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for * hashtables. */ #ifndef _LINUX_SIPHASH_H #define _LINUX_SIPHASH_H #include <linux/types.h> #include <linux/kernel.h> #define SIPHASH_ALIGNMENT __alignof__(u64) typedef struct { u64 key[2]; } siphash_key_t; static inline bool siphash_key_is_zero(const siphash_key_t *key) { return !(key->key[0] | key->key[1]); } u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); u64 siphash_1u64(const u64 a, const siphash_key_t *key); u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); u64 siphash_3u64(const u64 a, const u64 b, const u64 c, const siphash_key_t *key); u64 siphash_4u64(const u64 a, const u64 b, const u64 c, const u64 d, const siphash_key_t *key); u64 siphash_1u32(const u32 a, const siphash_key_t *key); u64 siphash_3u32(const u32 a, const u32 b, const u32 c, const siphash_key_t *key); static inline u64 siphash_2u32(const u32 a, const u32 b, const siphash_key_t *key) { return siphash_1u64((u64)b << 32 | a, key); } static inline u64 siphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d, const siphash_key_t *key) { return siphash_2u64((u64)b << 32 | a, (u64)d << 32 | c, key); } static inline u64 ___siphash_aligned(const __le64 *data, size_t len, const siphash_key_t *key) { if (__builtin_constant_p(len) && len == 4) return siphash_1u32(le32_to_cpup((const __le32 *)data), key); if (__builtin_constant_p(len) && len == 8) return siphash_1u64(le64_to_cpu(data[0]), key); if (__builtin_constant_p(len) && len == 16) return siphash_2u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), key); if (__builtin_constant_p(len) && len == 24) return siphash_3u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), le64_to_cpu(data[2]), key); if (__builtin_constant_p(len) && len == 32) return siphash_4u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), le64_to_cpu(data[2]), le64_to_cpu(data[3]), key); return __siphash_aligned(data, len, key); } /** * siphash - compute 64-bit siphash PRF value * @data: buffer to hash * @size: size of @data * @key: the siphash key */ static inline u64 siphash(const void *data, size_t len, const siphash_key_t *key) { if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) return __siphash_unaligned(data, len, key); return ___siphash_aligned(data, len, key); } #define HSIPHASH_ALIGNMENT __alignof__(unsigned long) typedef struct { unsigned long key[2]; } hsiphash_key_t; u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key); u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key); u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); u32 hsiphash_3u32(const u32 a, const u32 b, const u32 c, const hsiphash_key_t *key); u32 hsiphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d, const hsiphash_key_t *key); static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, const hsiphash_key_t *key) { if (__builtin_constant_p(len) && len == 4) return hsiphash_1u32(le32_to_cpu(data[0]), key); if (__builtin_constant_p(len) && len == 8) return hsiphash_2u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), key); if (__builtin_constant_p(len) && len == 12) return hsiphash_3u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), le32_to_cpu(data[2]), key); if (__builtin_constant_p(len) && len == 16) return hsiphash_4u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), le32_to_cpu(data[2]), le32_to_cpu(data[3]), key); return __hsiphash_aligned(data, len, key); } /** * hsiphash - compute 32-bit hsiphash PRF value * @data: buffer to hash * @size: size of @data * @key: the hsiphash key */ static inline u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key) { if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) return __hsiphash_unaligned(data, len, key); return ___hsiphash_aligned(data, len, key); } #endif /* _LINUX_SIPHASH_H */
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 // SPDX-License-Identifier: GPL-2.0 /* * Workingset detection * * Copyright (C) 2013 Red Hat, Inc., Johannes Weiner */ #include <linux/memcontrol.h> #include <linux/mm_inline.h> #include <linux/writeback.h> #include <linux/shmem_fs.h> #include <linux/pagemap.h> #include <linux/atomic.h> #include <linux/module.h> #include <linux/swap.h> #include <linux/dax.h> #include <linux/fs.h> #include <linux/mm.h> /* * Double CLOCK lists * * Per node, two clock lists are maintained for file pages: the * inactive and the active list. Freshly faulted pages start out at * the head of the inactive list and page reclaim scans pages from the * tail. Pages that are accessed multiple times on the inactive list * are promoted to the active list, to protect them from reclaim, * whereas active pages are demoted to the inactive list when the * active list grows too big. * * fault ------------------------+ * | * +--------------+ | +-------------+ * reclaim <- | inactive | <-+-- demotion | active | <--+ * +--------------+ +-------------+ | * | | * +-------------- promotion ------------------+ * * * Access frequency and refault distance * * A workload is thrashing when its pages are frequently used but they * are evicted from the inactive list every time before another access * would have promoted them to the active list. * * In cases where the average access distance between thrashing pages * is bigger than the size of memory there is nothing that can be * done - the thrashing set could never fit into memory under any * circumstance. * * However, the average access distance could be bigger than the * inactive list, yet smaller than the size of memory. In this case, * the set could fit into memory if it weren't for the currently * active pages - which may be used more, hopefully less frequently: * * +-memory available to cache-+ * | | * +-inactive------+-active----+ * a b | c d e f g h i | J K L M N | * +---------------+-----------+ * * It is prohibitively expensive to accurately track access frequency * of pages. But a reasonable approximation can be made to measure * thrashing on the inactive list, after which refaulting pages can be * activated optimistically to compete with the existing active pages. * * Approximating inactive page access frequency - Observations: * * 1. When a page is accessed for the first time, it is added to the * head of the inactive list, slides every existing inactive page * towards the tail by one slot, and pushes the current tail page * out of memory. * * 2. When a page is accessed for the second time, it is promoted to * the active list, shrinking the inactive list by one slot. This * also slides all inactive pages that were faulted into the cache * more recently than the activated page towards the tail of the * inactive list. * * Thus: * * 1. The sum of evictions and activations between any two points in * time indicate the minimum number of inactive pages accessed in * between. * * 2. Moving one inactive page N page slots towards the tail of the * list requires at least N inactive page accesses. * * Combining these: * * 1. When a page is finally evicted from memory, the number of * inactive pages accessed while the page was in cache is at least * the number of page slots on the inactive list. * * 2. In addition, measuring the sum of evictions and activations (E) * at the time of a page's eviction, and comparing it to another * reading (R) at the time the page faults back into memory tells * the minimum number of accesses while the page was not cached. * This is called the refault distance. * * Because the first access of the page was the fault and the second * access the refault, we combine the in-cache distance with the * out-of-cache distance to get the complete minimum access distance * of this page: * * NR_inactive + (R - E) * * And knowing the minimum access distance of a page, we can easily * tell if the page would be able to stay in cache assuming all page * slots in the cache were available: * * NR_inactive + (R - E) <= NR_inactive + NR_active * * which can be further simplified to * * (R - E) <= NR_active * * Put into words, the refault distance (out-of-cache) can be seen as * a deficit in inactive list space (in-cache). If the inactive list * had (R - E) more page slots, the page would not have been evicted * in between accesses, but activated instead. And on a full system, * the only thing eating into inactive list space is active pages. * * * Refaulting inactive pages * * All that is known about the active list is that the pages have been * accessed more than once in the past. This means that at any given * time there is actually a good chance that pages on the active list * are no longer in active use. * * So when a refault distance of (R - E) is observed and there are at * least (R - E) active pages, the refaulting page is activated * optimistically in the hope that (R - E) active pages are actually * used less frequently than the refaulting page - or even not used at * all anymore. * * That means if inactive cache is refaulting with a suitable refault * distance, we assume the cache workingset is transitioning and put * pressure on the current active list. * * If this is wrong and demotion kicks in, the pages which are truly * used more frequently will be reactivated while the less frequently * used once will be evicted from memory. * * But if this is right, the stale pages will be pushed out of memory * and the used pages get to stay in cache. * * Refaulting active pages * * If on the other hand the refaulting pages have recently been * deactivated, it means that the active list is no longer protecting * actively used cache from reclaim. The cache is NOT transitioning to * a different workingset; the existing workingset is thrashing in the * space allocated to the page cache. * * * Implementation * * For each node's LRU lists, a counter for inactive evictions and * activations is maintained (node->nonresident_age). * * On eviction, a snapshot of this counter (along with some bits to * identify the node) is stored in the now empty page cache * slot of the evicted page. This is called a shadow entry. * * On cache misses for which there are shadow entries, an eligible * refault distance will immediately activate the refaulting page. */ #define EVICTION_SHIFT ((BITS_PER_LONG - BITS_PER_XA_VALUE) + \ 1 + NODES_SHIFT + MEM_CGROUP_ID_SHIFT) #define EVICTION_MASK (~0UL >> EVICTION_SHIFT) /* * Eviction timestamps need to be able to cover the full range of * actionable refaults. However, bits are tight in the xarray * entry, and after storing the identifier for the lruvec there might * not be enough left to represent every single actionable refault. In * that case, we have to sacrifice granularity for distance, and group * evictions into coarser buckets by shaving off lower timestamp bits. */ static unsigned int bucket_order __read_mostly; static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction, bool workingset) { eviction >>= bucket_order; eviction &= EVICTION_MASK; eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid; eviction = (eviction << NODES_SHIFT) | pgdat->node_id; eviction = (eviction << 1) | workingset; return xa_mk_value(eviction); } static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat, unsigned long *evictionp, bool *workingsetp) { unsigned long entry = xa_to_value(shadow); int memcgid, nid; bool workingset; workingset = entry & 1; entry >>= 1; nid = entry & ((1UL << NODES_SHIFT) - 1); entry >>= NODES_SHIFT; memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1); entry >>= MEM_CGROUP_ID_SHIFT; *memcgidp = memcgid; *pgdat = NODE_DATA(nid); *evictionp = entry << bucket_order; *workingsetp = workingset; } /** * workingset_age_nonresident - age non-resident entries as LRU ages * @lruvec: the lruvec that was aged * @nr_pages: the number of pages to count * * As in-memory pages are aged, non-resident pages need to be aged as * well, in order for the refault distances later on to be comparable * to the in-memory dimensions. This function allows reclaim and LRU * operations to drive the non-resident aging along in parallel. */ void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages) { /* * Reclaiming a cgroup means reclaiming all its children in a * round-robin fashion. That means that each cgroup has an LRU * order that is composed of the LRU orders of its child * cgroups; and every page has an LRU position not just in the * cgroup that owns it, but in all of that group's ancestors. * * So when the physical inactive list of a leaf cgroup ages, * the virtual inactive lists of all its parents, including * the root cgroup's, age as well. */ do { atomic_long_add(nr_pages, &lruvec->nonresident_age); } while ((lruvec = parent_lruvec(lruvec))); } /** * workingset_eviction - note the eviction of a page from memory * @target_memcg: the cgroup that is causing the reclaim * @page: the page being evicted * * Returns a shadow entry to be stored in @page->mapping->i_pages in place * of the evicted @page so that a later refault can be detected. */ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg) { struct pglist_data *pgdat = page_pgdat(page); unsigned long eviction; struct lruvec *lruvec; int memcgid; /* Page is fully exclusive and pins page->mem_cgroup */ VM_BUG_ON_PAGE(PageLRU(page), page); VM_BUG_ON_PAGE(page_count(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); lruvec = mem_cgroup_lruvec(target_memcg, pgdat); workingset_age_nonresident(lruvec, thp_nr_pages(page)); /* XXX: target_memcg can be NULL, go through lruvec */ memcgid = mem_cgroup_id(lruvec_memcg(lruvec)); eviction = atomic_long_read(&lruvec->nonresident_age); return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page)); } /** * workingset_refault - evaluate the refault of a previously evicted page * @page: the freshly allocated replacement page * @shadow: shadow entry of the evicted page * * Calculates and evaluates the refault distance of the previously * evicted page in the context of the node and the memcg whose memory * pressure caused the eviction. */ void workingset_refault(struct page *page, void *shadow) { bool file = page_is_file_lru(page); struct mem_cgroup *eviction_memcg; struct lruvec *eviction_lruvec; unsigned long refault_distance; unsigned long workingset_size; struct pglist_data *pgdat; struct mem_cgroup *memcg; unsigned long eviction; struct lruvec *lruvec; unsigned long refault; bool workingset; int memcgid; unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset); rcu_read_lock(); /* * Look up the memcg associated with the stored ID. It might * have been deleted since the page's eviction. * * Note that in rare events the ID could have been recycled * for a new cgroup that refaults a shared page. This is * impossible to tell from the available data. However, this * should be a rare and limited disturbance, and activations * are always speculative anyway. Ultimately, it's the aging * algorithm's job to shake out the minimum access frequency * for the active cache. * * XXX: On !CONFIG_MEMCG, this will always return NULL; it * would be better if the root_mem_cgroup existed in all * configurations instead. */ eviction_memcg = mem_cgroup_from_id(memcgid); if (!mem_cgroup_disabled() && !eviction_memcg) goto out; eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat); refault = atomic_long_read(&eviction_lruvec->nonresident_age); /* * Calculate the refault distance * * The unsigned subtraction here gives an accurate distance * across nonresident_age overflows in most cases. There is a * special case: usually, shadow entries have a short lifetime * and are either refaulted or reclaimed along with the inode * before they get too old. But it is not impossible for the * nonresident_age to lap a shadow entry in the field, which * can then result in a false small refault distance, leading * to a false activation should this old entry actually * refault again. However, earlier kernels used to deactivate * unconditionally with *every* reclaim invocation for the * longest time, so the occasional inappropriate activation * leading to pressure on the active list is not a problem. */ refault_distance = (refault - eviction) & EVICTION_MASK; /* * The activation decision for this page is made at the level * where the eviction occurred, as that is where the LRU order * during page reclaim is being determined. * * However, the cgroup that will own the page is the one that * is actually experiencing the refault event. */ memcg = page_memcg(page); lruvec = mem_cgroup_lruvec(memcg, pgdat); inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file); /* * Compare the distance to the existing workingset size. We * don't activate pages that couldn't stay resident even if * all the memory was available to the workingset. Whether * workingset competition needs to consider anon or not depends * on having swap. */ workingset_size = lruvec_page_state(eviction_lruvec, NR_ACTIVE_FILE); if (!file) { workingset_size += lruvec_page_state(eviction_lruvec, NR_INACTIVE_FILE); } if (mem_cgroup_get_nr_swap_pages(memcg) > 0) { workingset_size += lruvec_page_state(eviction_lruvec, NR_ACTIVE_ANON); if (file) { workingset_size += lruvec_page_state(eviction_lruvec, NR_INACTIVE_ANON); } } if (refault_distance > workingset_size) goto out; SetPageActive(page); workingset_age_nonresident(lruvec, thp_nr_pages(page)); inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file); /* Page was active prior to eviction */ if (workingset) { SetPageWorkingset(page); /* XXX: Move to lru_cache_add() when it supports new vs putback */ spin_lock_irq(&page_pgdat(page)->lru_lock); lru_note_cost_page(page); spin_unlock_irq(&page_pgdat(page)->lru_lock); inc_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file); } out: rcu_read_unlock(); } /** * workingset_activation - note a page activation * @page: page that is being activated */ void workingset_activation(struct page *page) { struct mem_cgroup *memcg; struct lruvec *lruvec; rcu_read_lock(); /* * Filter non-memcg pages here, e.g. unmap can call * mark_page_accessed() on VDSO pages. * * XXX: See workingset_refault() - this should return * root_mem_cgroup even for !CONFIG_MEMCG. */ memcg = page_memcg_rcu(page); if (!mem_cgroup_disabled() && !memcg) goto out; lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page)); workingset_age_nonresident(lruvec, thp_nr_pages(page)); out: rcu_read_unlock(); } /* * Shadow entries reflect the share of the working set that does not * fit into memory, so their number depends on the access pattern of * the workload. In most cases, they will refault or get reclaimed * along with the inode, but a (malicious) workload that streams * through files with a total size several times that of available * memory, while preventing the inodes from being reclaimed, can * create excessive amounts of shadow nodes. To keep a lid on this, * track shadow nodes and reclaim them when they grow way past the * point where they would still be useful. */ static struct list_lru shadow_nodes; void workingset_update_node(struct xa_node *node) { /* * Track non-empty nodes that contain only shadow entries; * unlink those that contain pages or are being freed. * * Avoid acquiring the list_lru lock when the nodes are * already where they should be. The list_empty() test is safe * as node->private_list is protected by the i_pages lock. */ VM_WARN_ON_ONCE(!irqs_disabled()); /* For __inc_lruvec_page_state */ if (node->count && node->count == node->nr_values) { if (list_empty(&node->private_list)) { list_lru_add(&shadow_nodes, &node->private_list); __inc_lruvec_slab_state(node, WORKINGSET_NODES); } } else { if (!list_empty(&node->private_list)) { list_lru_del(&shadow_nodes, &node->private_list); __dec_lruvec_slab_state(node, WORKINGSET_NODES); } } } static unsigned long count_shadow_nodes(struct shrinker *shrinker, struct shrink_control *sc) { unsigned long max_nodes; unsigned long nodes; unsigned long pages; nodes = list_lru_shrink_count(&shadow_nodes, sc); /* * Approximate a reasonable limit for the nodes * containing shadow entries. We don't need to keep more * shadow entries than possible pages on the active list, * since refault distances bigger than that are dismissed. * * The size of the active list converges toward 100% of * overall page cache as memory grows, with only a tiny * inactive list. Assume the total cache size for that. * * Nodes might be sparsely populated, with only one shadow * entry in the extreme case. Obviously, we cannot keep one * node for every eligible shadow entry, so compromise on a * worst-case density of 1/8th. Below that, not all eligible * refaults can be detected anymore. * * On 64-bit with 7 xa_nodes per page and 64 slots * each, this will reclaim shadow entries when they consume * ~1.8% of available memory: * * PAGE_SIZE / xa_nodes / node_entries * 8 / PAGE_SIZE */ #ifdef CONFIG_MEMCG if (sc->memcg) { struct lruvec *lruvec; int i; lruvec = mem_cgroup_lruvec(sc->memcg, NODE_DATA(sc->nid)); for (pages = 0, i = 0; i < NR_LRU_LISTS; i++) pages += lruvec_page_state_local(lruvec, NR_LRU_BASE + i); pages += lruvec_page_state_local( lruvec, NR_SLAB_RECLAIMABLE_B) >> PAGE_SHIFT; pages += lruvec_page_state_local( lruvec, NR_SLAB_UNRECLAIMABLE_B) >> PAGE_SHIFT; } else #endif pages = node_present_pages(sc->nid); max_nodes = pages >> (XA_CHUNK_SHIFT - 3); if (!nodes) return SHRINK_EMPTY; if (nodes <= max_nodes) return 0; return nodes - max_nodes; } static enum lru_status shadow_lru_isolate(struct list_head *item, struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) __must_hold(lru_lock) { struct xa_node *node = container_of(item, struct xa_node, private_list); struct address_space *mapping; int ret; /* * Page cache insertions and deletions synchronously maintain * the shadow node LRU under the i_pages lock and the * lru_lock. Because the page cache tree is emptied before * the inode can be destroyed, holding the lru_lock pins any * address_space that has nodes on the LRU. * * We can then safely transition to the i_pages lock to * pin only the address_space of the particular node we want * to reclaim, take the node off-LRU, and drop the lru_lock. */ mapping = container_of(node->array, struct address_space, i_pages); /* Coming from the list, invert the lock order */ if (!xa_trylock(&mapping->i_pages)) { spin_unlock_irq(lru_lock); ret = LRU_RETRY; goto out; } list_lru_isolate(lru, item); __dec_lruvec_slab_state(node, WORKINGSET_NODES); spin_unlock(lru_lock); /* * The nodes should only contain one or more shadow entries, * no pages, so we expect to be able to remove them all and * delete and free the empty node afterwards. */ if (WARN_ON_ONCE(!node->nr_values)) goto out_invalid; if (WARN_ON_ONCE(node->count != node->nr_values)) goto out_invalid; mapping->nrexceptional -= node->nr_values; xa_delete_node(node, workingset_update_node); __inc_lruvec_slab_state(node, WORKINGSET_NODERECLAIM); out_invalid: xa_unlock_irq(&mapping->i_pages); ret = LRU_REMOVED_RETRY; out: cond_resched(); spin_lock_irq(lru_lock); return ret; } static unsigned long scan_shadow_nodes(struct shrinker *shrinker, struct shrink_control *sc) { /* list_lru lock nests inside the IRQ-safe i_pages lock */ return list_lru_shrink_walk_irq(&shadow_nodes, sc, shadow_lru_isolate, NULL); } static struct shrinker workingset_shadow_shrinker = { .count_objects = count_shadow_nodes, .scan_objects = scan_shadow_nodes, .seeks = 0, /* ->count reports only fully expendable nodes */ .flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, }; /* * Our list_lru->lock is IRQ-safe as it nests inside the IRQ-safe * i_pages lock. */ static struct lock_class_key shadow_nodes_key; static int __init workingset_init(void) { unsigned int timestamp_bits; unsigned int max_order; int ret; BUILD_BUG_ON(BITS_PER_LONG < EVICTION_SHIFT); /* * Calculate the eviction bucket size to cover the longest * actionable refault distance, which is currently half of * memory (totalram_pages/2). However, memory hotplug may add * some more pages at runtime, so keep working with up to * double the initial memory by using totalram_pages as-is. */ timestamp_bits = BITS_PER_LONG - EVICTION_SHIFT; max_order = fls_long(totalram_pages() - 1); if (max_order > timestamp_bits) bucket_order = max_order - timestamp_bits; pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", timestamp_bits, max_order, bucket_order); ret = prealloc_shrinker(&workingset_shadow_shrinker); if (ret) goto err; ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key, &workingset_shadow_shrinker); if (ret) goto err_list_lru; register_shrinker_prepared(&workingset_shadow_shrinker); return 0; err_list_lru: free_prealloced_shrinker(&workingset_shadow_shrinker); err: return ret; } module_init(workingset_init);
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 // SPDX-License-Identifier: GPL-2.0 /* * Interface between ext4 and JBD */ #include "ext4_jbd2.h" #include <trace/events/ext4.h> int ext4_inode_journal_mode(struct inode *inode) { if (EXT4_JOURNAL(inode) == NULL) return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ /* We do not support data journalling with delayed allocation */ if (!S_ISREG(inode->i_mode) || ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE) || test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && !test_opt(inode->i_sb, DELALLOC))) { /* We do not support data journalling for encrypted data */ if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ } if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ BUG(); } /* Just increment the non-pointer handle value */ static handle_t *ext4_get_nojournal(void) { handle_t *handle = current->journal_info; unsigned long ref_cnt = (unsigned long)handle; BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT); ref_cnt++; handle = (handle_t *)ref_cnt; current->journal_info = handle; return handle; } /* Decrement the non-pointer handle value */ static void ext4_put_nojournal(handle_t *handle) { unsigned long ref_cnt = (unsigned long)handle; BUG_ON(ref_cnt == 0); ref_cnt--; handle = (handle_t *)ref_cnt; current->journal_info = handle; } /* * Wrappers for jbd2_journal_start/end. */ static int ext4_journal_check_start(struct super_block *sb) { journal_t *journal; might_sleep(); if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) return -EIO; if (sb_rdonly(sb)) return -EROFS; WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); journal = EXT4_SB(sb)->s_journal; /* * Special case here: if the journal has aborted behind our * backs (eg. EIO in the commit thread), then we still need to * take the FS itself readonly cleanly. */ if (journal && is_journal_aborted(journal)) { ext4_abort(sb, -journal->j_errno, "Detected aborted journal"); return -EROFS; } return 0; } handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, int type, int blocks, int rsv_blocks, int revoke_creds) { journal_t *journal; int err; trace_ext4_journal_start(sb, blocks, rsv_blocks, revoke_creds, _RET_IP_); err = ext4_journal_check_start(sb); if (err < 0) return ERR_PTR(err); journal = EXT4_SB(sb)->s_journal; if (!journal || (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) return ext4_get_nojournal(); return jbd2__journal_start(journal, blocks, rsv_blocks, revoke_creds, GFP_NOFS, type, line); } int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) { struct super_block *sb; int err; int rc; if (!ext4_handle_valid(handle)) { ext4_put_nojournal(handle); return 0; } err = handle->h_err; if (!handle->h_transaction) { rc = jbd2_journal_stop(handle); return err ? err : rc; } sb = handle->h_transaction->t_journal->j_private; rc = jbd2_journal_stop(handle); if (!err) err = rc; if (err) __ext4_std_error(sb, where, line, err); return err; } handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, int type) { struct super_block *sb; int err; if (!ext4_handle_valid(handle)) return ext4_get_nojournal(); sb = handle->h_journal->j_private; trace_ext4_journal_start_reserved(sb, jbd2_handle_buffer_credits(handle), _RET_IP_); err = ext4_journal_check_start(sb); if (err < 0) { jbd2_journal_free_reserved(handle); return ERR_PTR(err); } err = jbd2_journal_start_reserved(handle, type, line); if (err < 0) return ERR_PTR(err); return handle; } int __ext4_journal_ensure_credits(handle_t *handle, int check_cred, int extend_cred, int revoke_cred) { if (!ext4_handle_valid(handle)) return 0; if (jbd2_handle_buffer_credits(handle) >= check_cred && handle->h_revoke_credits >= revoke_cred) return 0; extend_cred = max(0, extend_cred - jbd2_handle_buffer_credits(handle)); revoke_cred = max(0, revoke_cred - handle->h_revoke_credits); return ext4_journal_extend(handle, extend_cred, revoke_cred); } static void ext4_journal_abort_handle(const char *caller, unsigned int line, const char *err_fn, struct buffer_head *bh, handle_t *handle, int err) { char nbuf[16]; const char *errstr = ext4_decode_error(NULL, err, nbuf); BUG_ON(!ext4_handle_valid(handle)); if (bh) BUFFER_TRACE(bh, "abort"); if (!handle->h_err) handle->h_err = err; if (is_handle_aborted(handle)) return; printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n", caller, line, errstr, err_fn); jbd2_journal_abort_handle(handle); } static void ext4_check_bdev_write_error(struct super_block *sb) { struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; struct ext4_sb_info *sbi = EXT4_SB(sb); int err; /* * If the block device has write error flag, it may have failed to * async write out metadata buffers in the background. In this case, * we could read old data from disk and write it out again, which * may lead to on-disk filesystem inconsistency. */ if (errseq_check(&mapping->wb_err, READ_ONCE(sbi->s_bdev_wb_err))) { spin_lock(&sbi->s_bdev_wb_lock); err = errseq_check_and_advance(&mapping->wb_err, &sbi->s_bdev_wb_err); spin_unlock(&sbi->s_bdev_wb_lock); if (err) ext4_error_err(sb, -err, "Error while async write back metadata"); } } int __ext4_journal_get_write_access(const char *where, unsigned int line, handle_t *handle, struct buffer_head *bh) { int err = 0; might_sleep(); if (bh->b_bdev->bd_super) ext4_check_bdev_write_error(bh->b_bdev->bd_super); if (ext4_handle_valid(handle)) { err = jbd2_journal_get_write_access(handle, bh); if (err) ext4_journal_abort_handle(where, line, __func__, bh, handle, err); } return err; } /* * The ext4 forget function must perform a revoke if we are freeing data * which has been journaled. Metadata (eg. indirect blocks) must be * revoked in all cases. * * "bh" may be NULL: a metadata block may have been freed from memory * but there may still be a record of it in the journal, and that record * still needs to be revoked. * * If the handle isn't valid we're not journaling, but we still need to * call into ext4_journal_revoke() to put the buffer head. */ int __ext4_forget(const char *where, unsigned int line, handle_t *handle, int is_metadata, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t blocknr) { int err; might_sleep(); trace_ext4_forget(inode, is_metadata, blocknr); BUFFER_TRACE(bh, "enter"); jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " "data mode %x\n", bh, is_metadata, inode->i_mode, test_opt(inode->i_sb, DATA_FLAGS)); /* In the no journal case, we can just do a bforget and return */ if (!ext4_handle_valid(handle)) { bforget(bh); return 0; } /* Never use the revoke function if we are doing full data * journaling: there is no need to, and a V1 superblock won't * support it. Otherwise, only skip the revoke on un-journaled * data blocks. */ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || (!is_metadata && !ext4_should_journal_data(inode))) { if (bh) { BUFFER_TRACE(bh, "call jbd2_journal_forget"); err = jbd2_journal_forget(handle, bh); if (err) ext4_journal_abort_handle(where, line, __func__, bh, handle, err); return err; } return 0; } /* * data!=journal && (is_metadata || should_journal_data(inode)) */ BUFFER_TRACE(bh, "call jbd2_journal_revoke"); err = jbd2_journal_revoke(handle, blocknr, bh); if (err) { ext4_journal_abort_handle(where, line, __func__, bh, handle, err); __ext4_abort(inode->i_sb, where, line, -err, "error %d when attempting revoke", err); } BUFFER_TRACE(bh, "exit"); return err; } int __ext4_journal_get_create_access(const char *where, unsigned int line, handle_t *handle, struct buffer_head *bh) { int err = 0; if (ext4_handle_valid(handle)) { err = jbd2_journal_get_create_access(handle, bh); if (err) ext4_journal_abort_handle(where, line, __func__, bh, handle, err); } return err; } int __ext4_handle_dirty_metadata(const char *where, unsigned int line, handle_t *handle, struct inode *inode, struct buffer_head *bh) { int err = 0; might_sleep(); set_buffer_meta(bh); set_buffer_prio(bh); if (ext4_handle_valid(handle)) { err = jbd2_journal_dirty_metadata(handle, bh); /* Errors can only happen due to aborted journal or a nasty bug */ if (!is_handle_aborted(handle) && WARN_ON_ONCE(err)) { ext4_journal_abort_handle(where, line, __func__, bh, handle, err); if (inode == NULL) { pr_err("EXT4: jbd2_journal_dirty_metadata " "failed: handle type %u started at " "line %u, credits %u/%u, errcode %d", handle->h_type, handle->h_line_no, handle->h_requested_credits, jbd2_handle_buffer_credits(handle), err); return err; } ext4_error_inode(inode, where, line, bh->b_blocknr, "journal_dirty_metadata failed: " "handle type %u started at line %u, " "credits %u/%u, errcode %d", handle->h_type, handle->h_line_no, handle->h_requested_credits, jbd2_handle_buffer_credits(handle), err); } } else { set_buffer_uptodate(bh); if (inode) mark_buffer_dirty_inode(bh, inode); else mark_buffer_dirty(bh); if (inode && inode_needs_sync(inode)) { sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) { ext4_error_inode_err(inode, where, line, bh->b_blocknr, EIO, "IO error syncing itable block"); err = -EIO; } } } return err; } int __ext4_handle_dirty_super(const char *where, unsigned int line, handle_t *handle, struct super_block *sb) { struct buffer_head *bh = EXT4_SB(sb)->s_sbh; int err = 0; ext4_superblock_csum_set(sb); if (ext4_handle_valid(handle)) { err = jbd2_journal_dirty_metadata(handle, bh); if (err) ext4_journal_abort_handle(where, line, __func__, bh, handle, err); } else mark_buffer_dirty(bh); return err; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_UACCESS_64_H #define _ASM_X86_UACCESS_64_H /* * User space memory access functions */ #include <linux/compiler.h> #include <linux/lockdep.h> #include <linux/kasan-checks.h> #include <asm/alternative.h> #include <asm/cpufeatures.h> #include <asm/page.h> /* * Copy To/From Userspace */ /* Handles exceptions in both to and from, but doesn't do access_ok */ __must_check unsigned long copy_user_enhanced_fast_string(void *to, const void *from, unsigned len); __must_check unsigned long copy_user_generic_string(void *to, const void *from, unsigned len); __must_check unsigned long copy_user_generic_unrolled(void *to, const void *from, unsigned len); static __always_inline __must_check unsigned long copy_user_generic(void *to, const void *from, unsigned len) { unsigned ret; /* * If CPU has ERMS feature, use copy_user_enhanced_fast_string. * Otherwise, if CPU has rep_good feature, use copy_user_generic_string. * Otherwise, use copy_user_generic_unrolled. */ alternative_call_2(copy_user_generic_unrolled, copy_user_generic_string, X86_FEATURE_REP_GOOD, copy_user_enhanced_fast_string, X86_FEATURE_ERMS, ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from), "=d" (len)), "1" (to), "2" (from), "3" (len) : "memory", "rcx", "r8", "r9", "r10", "r11"); return ret; } static __always_inline __must_check unsigned long raw_copy_from_user(void *dst, const void __user *src, unsigned long size) { return copy_user_generic(dst, (__force void *)src, size); } static __always_inline __must_check unsigned long raw_copy_to_user(void __user *dst, const void *src, unsigned long size) { return copy_user_generic((__force void *)dst, src, size); } static __always_inline __must_check unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigned long size) { return copy_user_generic((__force void *)dst, (__force void *)src, size); } extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size, int zerorest); extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size); extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset, size_t len); static inline int __copy_from_user_inatomic_nocache(void *dst, const void __user *src, unsigned size) { kasan_check_write(dst, size); return __copy_user_nocache(dst, src, size, 0); } static inline int __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) { kasan_check_write(dst, size); return __copy_user_flushcache(dst, src, size); } #endif /* _ASM_X86_UACCESS_64_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 /* SPDX-License-Identifier: GPL-2.0 */ /* * Statically sized hash table implementation * (C) 2012 Sasha Levin <levinsasha928@gmail.com> */ #ifndef _LINUX_HASHTABLE_H #define _LINUX_HASHTABLE_H #include <linux/list.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/hash.h> #include <linux/rculist.h> #define DEFINE_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] = \ { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } #define DEFINE_READ_MOSTLY_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] __read_mostly = \ { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } #define DECLARE_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] #define HASH_SIZE(name) (ARRAY_SIZE(name)) #define HASH_BITS(name) ilog2(HASH_SIZE(name)) /* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. */ #define hash_min(val, bits) \ (sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits)) static inline void __hash_init(struct hlist_head *ht, unsigned int sz) { unsigned int i; for (i = 0; i < sz; i++) INIT_HLIST_HEAD(&ht[i]); } /** * hash_init - initialize a hash table * @hashtable: hashtable to be initialized * * Calculates the size of the hashtable from the given parameter, otherwise * same as hash_init_size. * * This has to be a macro since HASH_BITS() will not work on pointers since * it calculates the size during preprocessing. */ #define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable)) /** * hash_add - add an object to a hashtable * @hashtable: hashtable to add to * @node: the &struct hlist_node of the object to be added * @key: the key of the object to be added */ #define hash_add(hashtable, node, key) \ hlist_add_head(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) /** * hash_add_rcu - add an object to a rcu enabled hashtable * @hashtable: hashtable to add to * @node: the &struct hlist_node of the object to be added * @key: the key of the object to be added */ #define hash_add_rcu(hashtable, node, key) \ hlist_add_head_rcu(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) /** * hash_hashed - check whether an object is in any hashtable * @node: the &struct hlist_node of the object to be checked */ static inline bool hash_hashed(struct hlist_node *node) { return !hlist_unhashed(node); } static inline bool __hash_empty(struct hlist_head *ht, unsigned int sz) { unsigned int i; for (i = 0; i < sz; i++) if (!hlist_empty(&ht[i])) return false; return true; } /** * hash_empty - check whether a hashtable is empty * @hashtable: hashtable to check * * This has to be a macro since HASH_BITS() will not work on pointers since * it calculates the size during preprocessing. */ #define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable)) /** * hash_del - remove an object from a hashtable * @node: &struct hlist_node of the object to remove */ static inline void hash_del(struct hlist_node *node) { hlist_del_init(node); } /** * hash_del_rcu - remove an object from a rcu enabled hashtable * @node: &struct hlist_node of the object to remove */ static inline void hash_del_rcu(struct hlist_node *node) { hlist_del_init_rcu(node); } /** * hash_for_each - iterate over a hashtable * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ #define hash_for_each(name, bkt, obj, member) \ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ (bkt)++)\ hlist_for_each_entry(obj, &name[bkt], member) /** * hash_for_each_rcu - iterate over a rcu enabled hashtable * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ #define hash_for_each_rcu(name, bkt, obj, member) \ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ (bkt)++)\ hlist_for_each_entry_rcu(obj, &name[bkt], member) /** * hash_for_each_safe - iterate over a hashtable safe against removal of * hash entry * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor * @tmp: a &struct hlist_node used for temporary storage * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ #define hash_for_each_safe(name, bkt, tmp, obj, member) \ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ (bkt)++)\ hlist_for_each_entry_safe(obj, tmp, &name[bkt], member) /** * hash_for_each_possible - iterate over all possible objects hashing to the * same bucket * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ #define hash_for_each_possible(name, obj, member, key) \ hlist_for_each_entry(obj, &name[hash_min(key, HASH_BITS(name))], member) /** * hash_for_each_possible_rcu - iterate over all possible objects hashing to the * same bucket in an rcu enabled hashtable * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ #define hash_for_each_possible_rcu(name, obj, member, key, cond...) \ hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\ member, ## cond) /** * hash_for_each_possible_rcu_notrace - iterate over all possible objects hashing * to the same bucket in an rcu enabled hashtable in a rcu enabled hashtable * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over * * This is the same as hash_for_each_possible_rcu() except that it does * not do any RCU debugging or tracing. */ #define hash_for_each_possible_rcu_notrace(name, obj, member, key) \ hlist_for_each_entry_rcu_notrace(obj, \ &name[hash_min(key, HASH_BITS(name))], member) /** * hash_for_each_possible_safe - iterate over all possible objects hashing to the * same bucket safe against removals * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry * @tmp: a &struct hlist_node used for temporary storage * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ #define hash_for_each_possible_safe(name, obj, tmp, member, key) \ hlist_for_each_entry_safe(obj, tmp,\ &name[hash_min(key, HASH_BITS(name))], member) #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 /* SPDX-License-Identifier: GPL-2.0 */ /* Based on net/mac80211/trace.h */ #undef TRACE_SYSTEM #define TRACE_SYSTEM mac802154 #if !defined(__MAC802154_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) #define __MAC802154_DRIVER_TRACE #include <linux/tracepoint.h> #include <net/mac802154.h> #include "ieee802154_i.h" #define MAXNAME 32 #define LOCAL_ENTRY __array(char, wpan_phy_name, MAXNAME) #define LOCAL_ASSIGN strlcpy(__entry->wpan_phy_name, \ wpan_phy_name(local->hw.phy), MAXNAME) #define LOCAL_PR_FMT "%s" #define LOCAL_PR_ARG __entry->wpan_phy_name #define CCA_ENTRY __field(enum nl802154_cca_modes, cca_mode) \ __field(enum nl802154_cca_opts, cca_opt) #define CCA_ASSIGN \ do { \ (__entry->cca_mode) = cca->mode; \ (__entry->cca_opt) = cca->opt; \ } while (0) #define CCA_PR_FMT "cca_mode: %d, cca_opt: %d" #define CCA_PR_ARG __entry->cca_mode, __entry->cca_opt #define BOOL_TO_STR(bo) (bo) ? "true" : "false" /* Tracing for driver callbacks */ DECLARE_EVENT_CLASS(local_only_evt4, TP_PROTO(struct ieee802154_local *local), TP_ARGS(local), TP_STRUCT__entry( LOCAL_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; ), TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG) ); DEFINE_EVENT(local_only_evt4, 802154_drv_return_void, TP_PROTO(struct ieee802154_local *local), TP_ARGS(local) ); TRACE_EVENT(802154_drv_return_int, TP_PROTO(struct ieee802154_local *local, int ret), TP_ARGS(local, ret), TP_STRUCT__entry( LOCAL_ENTRY __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; __entry->ret = ret; ), TP_printk(LOCAL_PR_FMT ", returned: %d", LOCAL_PR_ARG, __entry->ret) ); DEFINE_EVENT(local_only_evt4, 802154_drv_start, TP_PROTO(struct ieee802154_local *local), TP_ARGS(local) ); DEFINE_EVENT(local_only_evt4, 802154_drv_stop, TP_PROTO(struct ieee802154_local *local), TP_ARGS(local) ); TRACE_EVENT(802154_drv_set_channel, TP_PROTO(struct ieee802154_local *local, u8 page, u8 channel), TP_ARGS(local, page, channel), TP_STRUCT__entry( LOCAL_ENTRY __field(u8, page) __field(u8, channel) ), TP_fast_assign( LOCAL_ASSIGN; __entry->page = page; __entry->channel = channel; ), TP_printk(LOCAL_PR_FMT ", page: %d, channel: %d", LOCAL_PR_ARG, __entry->page, __entry->channel) ); TRACE_EVENT(802154_drv_set_cca_mode, TP_PROTO(struct ieee802154_local *local, const struct wpan_phy_cca *cca), TP_ARGS(local, cca), TP_STRUCT__entry( LOCAL_ENTRY CCA_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; CCA_ASSIGN; ), TP_printk(LOCAL_PR_FMT ", " CCA_PR_FMT, LOCAL_PR_ARG, CCA_PR_ARG) ); TRACE_EVENT(802154_drv_set_cca_ed_level, TP_PROTO(struct ieee802154_local *local, s32 mbm), TP_ARGS(local, mbm), TP_STRUCT__entry( LOCAL_ENTRY __field(s32, mbm) ), TP_fast_assign( LOCAL_ASSIGN; __entry->mbm = mbm; ), TP_printk(LOCAL_PR_FMT ", ed level: %d", LOCAL_PR_ARG, __entry->mbm) ); TRACE_EVENT(802154_drv_set_tx_power, TP_PROTO(struct ieee802154_local *local, s32 power), TP_ARGS(local, power), TP_STRUCT__entry( LOCAL_ENTRY __field(s32, power) ), TP_fast_assign( LOCAL_ASSIGN; __entry->power = power; ), TP_printk(LOCAL_PR_FMT ", mbm: %d", LOCAL_PR_ARG, __entry->power) ); TRACE_EVENT(802154_drv_set_lbt_mode, TP_PROTO(struct ieee802154_local *local, bool mode), TP_ARGS(local, mode), TP_STRUCT__entry( LOCAL_ENTRY __field(bool, mode) ), TP_fast_assign( LOCAL_ASSIGN; __entry->mode = mode; ), TP_printk(LOCAL_PR_FMT ", lbt mode: %s", LOCAL_PR_ARG, BOOL_TO_STR(__entry->mode)) ); TRACE_EVENT(802154_drv_set_short_addr, TP_PROTO(struct ieee802154_local *local, __le16 short_addr), TP_ARGS(local, short_addr), TP_STRUCT__entry( LOCAL_ENTRY __field(__le16, short_addr) ), TP_fast_assign( LOCAL_ASSIGN; __entry->short_addr = short_addr; ), TP_printk(LOCAL_PR_FMT ", short addr: 0x%04x", LOCAL_PR_ARG, le16_to_cpu(__entry->short_addr)) ); TRACE_EVENT(802154_drv_set_pan_id, TP_PROTO(struct ieee802154_local *local, __le16 pan_id), TP_ARGS(local, pan_id), TP_STRUCT__entry( LOCAL_ENTRY __field(__le16, pan_id) ), TP_fast_assign( LOCAL_ASSIGN; __entry->pan_id = pan_id; ), TP_printk(LOCAL_PR_FMT ", pan id: 0x%04x", LOCAL_PR_ARG, le16_to_cpu(__entry->pan_id)) ); TRACE_EVENT(802154_drv_set_extended_addr, TP_PROTO(struct ieee802154_local *local, __le64 extended_addr), TP_ARGS(local, extended_addr), TP_STRUCT__entry( LOCAL_ENTRY __field(__le64, extended_addr) ), TP_fast_assign( LOCAL_ASSIGN; __entry->extended_addr = extended_addr; ), TP_printk(LOCAL_PR_FMT ", extended addr: 0x%llx", LOCAL_PR_ARG, le64_to_cpu(__entry->extended_addr)) ); TRACE_EVENT(802154_drv_set_pan_coord, TP_PROTO(struct ieee802154_local *local, bool is_coord), TP_ARGS(local, is_coord), TP_STRUCT__entry( LOCAL_ENTRY __field(bool, is_coord) ), TP_fast_assign( LOCAL_ASSIGN; __entry->is_coord = is_coord; ), TP_printk(LOCAL_PR_FMT ", is_coord: %s", LOCAL_PR_ARG, BOOL_TO_STR(__entry->is_coord)) ); TRACE_EVENT(802154_drv_set_csma_params, TP_PROTO(struct ieee802154_local *local, u8 min_be, u8 max_be, u8 max_csma_backoffs), TP_ARGS(local, min_be, max_be, max_csma_backoffs), TP_STRUCT__entry( LOCAL_ENTRY __field(u8, min_be) __field(u8, max_be) __field(u8, max_csma_backoffs) ), TP_fast_assign( LOCAL_ASSIGN, __entry->min_be = min_be; __entry->max_be = max_be; __entry->max_csma_backoffs = max_csma_backoffs; ), TP_printk(LOCAL_PR_FMT ", min be: %d, max be: %d, max csma backoffs: %d", LOCAL_PR_ARG, __entry->min_be, __entry->max_be, __entry->max_csma_backoffs) ); TRACE_EVENT(802154_drv_set_max_frame_retries, TP_PROTO(struct ieee802154_local *local, s8 max_frame_retries), TP_ARGS(local, max_frame_retries), TP_STRUCT__entry( LOCAL_ENTRY __field(s8, max_frame_retries) ), TP_fast_assign( LOCAL_ASSIGN; __entry->max_frame_retries = max_frame_retries; ), TP_printk(LOCAL_PR_FMT ", max frame retries: %d", LOCAL_PR_ARG, __entry->max_frame_retries) ); TRACE_EVENT(802154_drv_set_promiscuous_mode, TP_PROTO(struct ieee802154_local *local, bool on), TP_ARGS(local, on), TP_STRUCT__entry( LOCAL_ENTRY __field(bool, on) ), TP_fast_assign( LOCAL_ASSIGN; __entry->on = on; ), TP_printk(LOCAL_PR_FMT ", promiscuous mode: %s", LOCAL_PR_ARG, BOOL_TO_STR(__entry->on)) ); #endif /* !__MAC802154_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 /* SPDX-License-Identifier: GPL-2.0 */ /** * lib/minmax.c: windowed min/max tracker by Kathleen Nichols. * */ #ifndef MINMAX_H #define MINMAX_H #include <linux/types.h> /* A single data point for our parameterized min-max tracker */ struct minmax_sample { u32 t; /* time measurement was taken */ u32 v; /* value measured */ }; /* State for the parameterized min-max tracker */ struct minmax { struct minmax_sample s[3]; }; static inline u32 minmax_get(const struct minmax *m) { return m->s[0].v; } static inline u32 minmax_reset(struct minmax *m, u32 t, u32 meas) { struct minmax_sample val = { .t = t, .v = meas }; m->s[2] = m->s[1] = m->s[0] = val; return m->s[0].v; } u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas); u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas); #endif
1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 // SPDX-License-Identifier: GPL-2.0-only /* * Implementation of the policy database. * * Author : Stephen Smalley, <sds@tycho.nsa.gov> */ /* * Updated: Trusted Computer Solutions, Inc. <dgoeddel@trustedcs.com> * * Support for enhanced MLS infrastructure. * * Updated: Frank Mayer <mayerf@tresys.com> and Karl MacMillan <kmacmillan@tresys.com> * * Added conditional policy language extensions * * Updated: Hewlett-Packard <paul@paul-moore.com> * * Added support for the policy capability bitmap * * Update: Mellanox Techonologies * * Added Infiniband support * * Copyright (C) 2016 Mellanox Techonologies * Copyright (C) 2007 Hewlett-Packard Development Company, L.P. * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc. * Copyright (C) 2003 - 2004 Tresys Technology, LLC */ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/audit.h> #include "security.h" #include "policydb.h" #include "conditional.h" #include "mls.h" #include "services.h" #define _DEBUG_HASHES #ifdef DEBUG_HASHES static const char *symtab_name[SYM_NUM] = { "common prefixes", "classes", "roles", "types", "users", "bools", "levels", "categories", }; #endif struct policydb_compat_info { int version; int sym_num; int ocon_num; }; /* These need to be updated if SYM_NUM or OCON_NUM changes */ static struct policydb_compat_info policydb_compat[] = { { .version = POLICYDB_VERSION_BASE, .sym_num = SYM_NUM - 3, .ocon_num = OCON_NUM - 3, }, { .version = POLICYDB_VERSION_BOOL, .sym_num = SYM_NUM - 2, .ocon_num = OCON_NUM - 3, }, { .version = POLICYDB_VERSION_IPV6, .sym_num = SYM_NUM - 2, .ocon_num = OCON_NUM - 2, }, { .version = POLICYDB_VERSION_NLCLASS, .sym_num = SYM_NUM - 2, .ocon_num = OCON_NUM - 2, }, { .version = POLICYDB_VERSION_MLS, .sym_num = SYM_NUM, .ocon_num = OCON_NUM - 2, }, { .version = POLICYDB_VERSION_AVTAB, .sym_num = SYM_NUM, .ocon_num = OCON_NUM - 2, }, { .version = POLICYDB_VERSION_RANGETRANS, .sym_num = SYM_NUM, .ocon_num = OCON_NUM - 2, }, { .version = POLICYDB_VERSION_POLCAP, .sym_num = SYM_NUM, .ocon_num = OCON_NUM - 2, }, { .version = POLICYDB_VERSION_PERMISSIVE, .sym_num = SYM_NUM, .ocon_num = OCON_NUM - 2, }, { .version = POLICYDB_VERSION_BOUNDARY, .sym_num = SYM_NUM, .ocon_num = OCON_NUM - 2, }, { .version = POLICYDB_VERSION_FILENAME_TRANS, .sym_num = SYM_NUM, .ocon_num = OCON_NUM - 2, }, { .version = POLICYDB_VERSION_ROLETRANS, .sym_num = SYM_NUM, .ocon_num = OCON_NUM - 2, }, { .version = POLICYDB_VERSION_NEW_OBJECT_DEFAULTS, .sym_num = SYM_NUM, .ocon_num = OCON_NUM - 2, }, { .version = POLICYDB_VERSION_DEFAULT_TYPE, .sym_num = SYM_NUM, .ocon_num = OCON_NUM - 2, }, { .version = POLICYDB_VERSION_CONSTRAINT_NAMES, .sym_num = SYM_NUM, .ocon_num = OCON_NUM - 2, }, { .version = POLICYDB_VERSION_XPERMS_IOCTL, .sym_num = SYM_NUM, .ocon_num = OCON_NUM - 2, }, { .version = POLICYDB_VERSION_INFINIBAND, .sym_num = SYM_NUM, .ocon_num = OCON_NUM, }, { .version = POLICYDB_VERSION_GLBLUB, .sym_num = SYM_NUM, .ocon_num = OCON_NUM, }, { .version = POLICYDB_VERSION_COMP_FTRANS, .sym_num = SYM_NUM, .ocon_num = OCON_NUM, }, }; static struct policydb_compat_info *policydb_lookup_compat(int version) { int i; struct policydb_compat_info *info = NULL; for (i = 0; i < ARRAY_SIZE(policydb_compat); i++) { if (policydb_compat[i].version == version) { info = &policydb_compat[i]; break; } } return info; } /* * The following *_destroy functions are used to * free any memory allocated for each kind of * symbol data in the policy database. */ static int perm_destroy(void *key, void *datum, void *p) { kfree(key); kfree(datum); return 0; } static int common_destroy(void *key, void *datum, void *p) { struct common_datum *comdatum; kfree(key); if (datum) { comdatum = datum; hashtab_map(&comdatum->permissions.table, perm_destroy, NULL); hashtab_destroy(&comdatum->permissions.table); } kfree(datum); return 0; } static void constraint_expr_destroy(struct constraint_expr *expr) { if (expr) { ebitmap_destroy(&expr->names); if (expr->type_names) { ebitmap_destroy(&expr->type_names->types); ebitmap_destroy(&expr->type_names->negset); kfree(expr->type_names); } kfree(expr); } } static int cls_destroy(void *key, void *datum, void *p) { struct class_datum *cladatum; struct constraint_node *constraint, *ctemp; struct constraint_expr *e, *etmp; kfree(key); if (datum) { cladatum = datum; hashtab_map(&cladatum->permissions.table, perm_destroy, NULL); hashtab_destroy(&cladatum->permissions.table); constraint = cladatum->constraints; while (constraint) { e = constraint->expr; while (e) { etmp = e; e = e->next; constraint_expr_destroy(etmp); } ctemp = constraint; constraint = constraint->next; kfree(ctemp); } constraint = cladatum->validatetrans; while (constraint) { e = constraint->expr; while (e) { etmp = e; e = e->next; constraint_expr_destroy(etmp); } ctemp = constraint; constraint = constraint->next; kfree(ctemp); } kfree(cladatum->comkey); } kfree(datum); return 0; } static int role_destroy(void *key, void *datum, void *p) { struct role_datum *role; kfree(key); if (datum) { role = datum; ebitmap_destroy(&role->dominates); ebitmap_destroy(&role->types); } kfree(datum); return 0; } static int type_destroy(void *key, void *datum, void *p) { kfree(key); kfree(datum); return 0; } static int user_destroy(void *key, void *datum, void *p) { struct user_datum *usrdatum; kfree(key); if (datum) { usrdatum = datum; ebitmap_destroy(&usrdatum->roles); ebitmap_destroy(&usrdatum->range.level[0].cat); ebitmap_destroy(&usrdatum->range.level[1].cat); ebitmap_destroy(&usrdatum->dfltlevel.cat); } kfree(datum); return 0; } static int sens_destroy(void *key, void *datum, void *p) { struct level_datum *levdatum; kfree(key); if (datum) { levdatum = datum; if (levdatum->level) ebitmap_destroy(&levdatum->level->cat); kfree(levdatum->level); } kfree(datum); return 0; } static int cat_destroy(void *key, void *datum, void *p) { kfree(key); kfree(datum); return 0; } static int (*destroy_f[SYM_NUM]) (void *key, void *datum, void *datap) = { common_destroy, cls_destroy, role_destroy, type_destroy, user_destroy, cond_destroy_bool, sens_destroy, cat_destroy, }; static int filenametr_destroy(void *key, void *datum, void *p) { struct filename_trans_key *ft = key; struct filename_trans_datum *next, *d = datum; kfree(ft->name); kfree(key); do { ebitmap_destroy(&d->stypes); next = d->next; kfree(d); d = next; } while (unlikely(d)); cond_resched(); return 0; } static int range_tr_destroy(void *key, void *datum, void *p) { struct mls_range *rt = datum; kfree(key); ebitmap_destroy(&rt->level[0].cat); ebitmap_destroy(&rt->level[1].cat); kfree(datum); cond_resched(); return 0; } static int role_tr_destroy(void *key, void *datum, void *p) { kfree(key); kfree(datum); return 0; } static void ocontext_destroy(struct ocontext *c, int i) { if (!c) return; context_destroy(&c->context[0]); context_destroy(&c->context[1]); if (i == OCON_ISID || i == OCON_FS || i == OCON_NETIF || i == OCON_FSUSE) kfree(c->u.name); kfree(c); } /* * Initialize the role table. */ static int roles_init(struct policydb *p) { char *key = NULL; int rc; struct role_datum *role; role = kzalloc(sizeof(*role), GFP_KERNEL); if (!role) return -ENOMEM; rc = -EINVAL; role->value = ++p->p_roles.nprim; if (role->value != OBJECT_R_VAL) goto out; rc = -ENOMEM; key = kstrdup(OBJECT_R, GFP_KERNEL); if (!key) goto out; rc = symtab_insert(&p->p_roles, key, role); if (rc) goto out; return 0; out: kfree(key); kfree(role); return rc; } static u32 filenametr_hash(const void *k) { const struct filename_trans_key *ft = k; unsigned long hash; unsigned int byte_num; unsigned char focus; hash = ft->ttype ^ ft->tclass; byte_num = 0; while ((focus = ft->name[byte_num++])) hash = partial_name_hash(focus, hash); return hash; } static int filenametr_cmp(const void *k1, const void *k2) { const struct filename_trans_key *ft1 = k1; const struct filename_trans_key *ft2 = k2; int v; v = ft1->ttype - ft2->ttype; if (v) return v; v = ft1->tclass - ft2->tclass; if (v) return v; return strcmp(ft1->name, ft2->name); } static const struct hashtab_key_params filenametr_key_params = { .hash = filenametr_hash, .cmp = filenametr_cmp, }; struct filename_trans_datum *policydb_filenametr_search( struct policydb *p, struct filename_trans_key *key) { return hashtab_search(&p->filename_trans, key, filenametr_key_params); } static u32 rangetr_hash(const void *k) { const struct range_trans *key = k; return key->source_type + (key->target_type << 3) + (key->target_class << 5); } static int rangetr_cmp(const void *k1, const void *k2) { const struct range_trans *key1 = k1, *key2 = k2; int v; v = key1->source_type - key2->source_type; if (v) return v; v = key1->target_type - key2->target_type; if (v) return v; v = key1->target_class - key2->target_class; return v; } static const struct hashtab_key_params rangetr_key_params = { .hash = rangetr_hash, .cmp = rangetr_cmp, }; struct mls_range *policydb_rangetr_search(struct policydb *p, struct range_trans *key) { return hashtab_search(&p->range_tr, key, rangetr_key_params); } static u32 role_trans_hash(const void *k) { const struct role_trans_key *key = k; return key->role + (key->type << 3) + (key->tclass << 5); } static int role_trans_cmp(const void *k1, const void *k2) { const struct role_trans_key *key1 = k1, *key2 = k2; int v; v = key1->role - key2->role; if (v) return v; v = key1->type - key2->type; if (v) return v; return key1->tclass - key2->tclass; } static const struct hashtab_key_params roletr_key_params = { .hash = role_trans_hash, .cmp = role_trans_cmp, }; struct role_trans_datum *policydb_roletr_search(struct policydb *p, struct role_trans_key *key) { return hashtab_search(&p->role_tr, key, roletr_key_params); } /* * Initialize a policy database structure. */ static void policydb_init(struct policydb *p) { memset(p, 0, sizeof(*p)); avtab_init(&p->te_avtab); cond_policydb_init(p); ebitmap_init(&p->filename_trans_ttypes); ebitmap_init(&p->policycaps); ebitmap_init(&p->permissive_map); } /* * The following *_index functions are used to * define the val_to_name and val_to_struct arrays * in a policy database structure. The val_to_name * arrays are used when converting security context * structures into string representations. The * val_to_struct arrays are used when the attributes * of a class, role, or user are needed. */ static int common_index(void *key, void *datum, void *datap) { struct policydb *p; struct common_datum *comdatum; comdatum = datum; p = datap; if (!comdatum->value || comdatum->value > p->p_commons.nprim) return -EINVAL; p->sym_val_to_name[SYM_COMMONS][comdatum->value - 1] = key; return 0; } static int class_index(void *key, void *datum, void *datap) { struct policydb *p; struct class_datum *cladatum; cladatum = datum; p = datap; if (!cladatum->value || cladatum->value > p->p_classes.nprim) return -EINVAL; p->sym_val_to_name[SYM_CLASSES][cladatum->value - 1] = key; p->class_val_to_struct[cladatum->value - 1] = cladatum; return 0; } static int role_index(void *key, void *datum, void *datap) { struct policydb *p; struct role_datum *role; role = datum; p = datap; if (!role->value || role->value > p->p_roles.nprim || role->bounds > p->p_roles.nprim) return -EINVAL; p->sym_val_to_name[SYM_ROLES][role->value - 1] = key; p->role_val_to_struct[role->value - 1] = role; return 0; } static int type_index(void *key, void *datum, void *datap) { struct policydb *p; struct type_datum *typdatum; typdatum = datum; p = datap; if (typdatum->primary) { if (!typdatum->value || typdatum->value > p->p_types.nprim || typdatum->bounds > p->p_types.nprim) return -EINVAL; p->sym_val_to_name[SYM_TYPES][typdatum->value - 1] = key; p->type_val_to_struct[typdatum->value - 1] = typdatum; } return 0; } static int user_index(void *key, void *datum, void *datap) { struct policydb *p; struct user_datum *usrdatum; usrdatum = datum; p = datap; if (!usrdatum->value || usrdatum->value > p->p_users.nprim || usrdatum->bounds > p->p_users.nprim) return -EINVAL; p->sym_val_to_name[SYM_USERS][usrdatum->value - 1] = key; p->user_val_to_struct[usrdatum->value - 1] = usrdatum; return 0; } static int sens_index(void *key, void *datum, void *datap) { struct policydb *p; struct level_datum *levdatum; levdatum = datum; p = datap; if (!levdatum->isalias) { if (!levdatum->level->sens || levdatum->level->sens > p->p_levels.nprim) return -EINVAL; p->sym_val_to_name[SYM_LEVELS][levdatum->level->sens - 1] = key; } return 0; } static int cat_index(void *key, void *datum, void *datap) { struct policydb *p; struct cat_datum *catdatum; catdatum = datum; p = datap; if (!catdatum->isalias) { if (!catdatum->value || catdatum->value > p->p_cats.nprim) return -EINVAL; p->sym_val_to_name[SYM_CATS][catdatum->value - 1] = key; } return 0; } static int (*index_f[SYM_NUM]) (void *key, void *datum, void *datap) = { common_index, class_index, role_index, type_index, user_index, cond_index_bool, sens_index, cat_index, }; #ifdef DEBUG_HASHES static void hash_eval(struct hashtab *h, const char *hash_name) { struct hashtab_info info; hashtab_stat(h, &info); pr_debug("SELinux: %s: %d entries and %d/%d buckets used, longest chain length %d\n", hash_name, h->nel, info.slots_used, h->size, info.max_chain_len); } static void symtab_hash_eval(struct symtab *s) { int i; for (i = 0; i < SYM_NUM; i++) hash_eval(&s[i].table, symtab_name[i]); } #else static inline void hash_eval(struct hashtab *h, char *hash_name) { } #endif /* * Define the other val_to_name and val_to_struct arrays * in a policy database structure. * * Caller must clean up on failure. */ static int policydb_index(struct policydb *p) { int i, rc; if (p->mls_enabled) pr_debug("SELinux: %d users, %d roles, %d types, %d bools, %d sens, %d cats\n", p->p_users.nprim, p->p_roles.nprim, p->p_types.nprim, p->p_bools.nprim, p->p_levels.nprim, p->p_cats.nprim); else pr_debug("SELinux: %d users, %d roles, %d types, %d bools\n", p->p_users.nprim, p->p_roles.nprim, p->p_types.nprim, p->p_bools.nprim); pr_debug("SELinux: %d classes, %d rules\n", p->p_classes.nprim, p->te_avtab.nel); #ifdef DEBUG_HASHES avtab_hash_eval(&p->te_avtab, "rules"); symtab_hash_eval(p->symtab); #endif p->class_val_to_struct = kcalloc(p->p_classes.nprim, sizeof(*p->class_val_to_struct), GFP_KERNEL); if (!p->class_val_to_struct) return -ENOMEM; p->role_val_to_struct = kcalloc(p->p_roles.nprim, sizeof(*p->role_val_to_struct), GFP_KERNEL); if (!p->role_val_to_struct) return -ENOMEM; p->user_val_to_struct = kcalloc(p->p_users.nprim, sizeof(*p->user_val_to_struct), GFP_KERNEL); if (!p->user_val_to_struct) return -ENOMEM; p->type_val_to_struct = kvcalloc(p->p_types.nprim, sizeof(*p->type_val_to_struct), GFP_KERNEL); if (!p->type_val_to_struct) return -ENOMEM; rc = cond_init_bool_indexes(p); if (rc) goto out; for (i = 0; i < SYM_NUM; i++) { p->sym_val_to_name[i] = kvcalloc(p->symtab[i].nprim, sizeof(char *), GFP_KERNEL); if (!p->sym_val_to_name[i]) return -ENOMEM; rc = hashtab_map(&p->symtab[i].table, index_f[i], p); if (rc) goto out; } rc = 0; out: return rc; } /* * Free any memory allocated by a policy database structure. */ void policydb_destroy(struct policydb *p) { struct ocontext *c, *ctmp; struct genfs *g, *gtmp; int i; struct role_allow *ra, *lra = NULL; for (i = 0; i < SYM_NUM; i++) { cond_resched(); hashtab_map(&p->symtab[i].table, destroy_f[i], NULL); hashtab_destroy(&p->symtab[i].table); } for (i = 0; i < SYM_NUM; i++) kvfree(p->sym_val_to_name[i]); kfree(p->class_val_to_struct); kfree(p->role_val_to_struct); kfree(p->user_val_to_struct); kvfree(p->type_val_to_struct); avtab_destroy(&p->te_avtab); for (i = 0; i < OCON_NUM; i++) { cond_resched(); c = p->ocontexts[i]; while (c) { ctmp = c; c = c->next; ocontext_destroy(ctmp, i); } p->ocontexts[i] = NULL; } g = p->genfs; while (g) { cond_resched(); kfree(g->fstype); c = g->head; while (c) { ctmp = c; c = c->next; ocontext_destroy(ctmp, OCON_FSUSE); } gtmp = g; g = g->next; kfree(gtmp); } p->genfs = NULL; cond_policydb_destroy(p); hashtab_map(&p->role_tr, role_tr_destroy, NULL); hashtab_destroy(&p->role_tr); for (ra = p->role_allow; ra; ra = ra->next) { cond_resched(); kfree(lra); lra = ra; } kfree(lra); hashtab_map(&p->filename_trans, filenametr_destroy, NULL); hashtab_destroy(&p->filename_trans); hashtab_map(&p->range_tr, range_tr_destroy, NULL); hashtab_destroy(&p->range_tr); if (p->type_attr_map_array) { for (i = 0; i < p->p_types.nprim; i++) ebitmap_destroy(&p->type_attr_map_array[i]); kvfree(p->type_attr_map_array); } ebitmap_destroy(&p->filename_trans_ttypes); ebitmap_destroy(&p->policycaps); ebitmap_destroy(&p->permissive_map); } /* * Load the initial SIDs specified in a policy database * structure into a SID table. */ int policydb_load_isids(struct policydb *p, struct sidtab *s) { struct ocontext *head, *c; int rc; rc = sidtab_init(s); if (rc) { pr_err("SELinux: out of memory on SID table init\n"); return rc; } head = p->ocontexts[OCON_ISID]; for (c = head; c; c = c->next) { u32 sid = c->sid[0]; const char *name = security_get_initial_sid_context(sid); if (sid == SECSID_NULL) { pr_err("SELinux: SID 0 was assigned a context.\n"); sidtab_destroy(s); return -EINVAL; } /* Ignore initial SIDs unused by this kernel. */ if (!name) continue; rc = sidtab_set_initial(s, sid, &c->context[0]); if (rc) { pr_err("SELinux: unable to load initial SID %s.\n", name); sidtab_destroy(s); return rc; } } return 0; } int policydb_class_isvalid(struct policydb *p, unsigned int class) { if (!class || class > p->p_classes.nprim) return 0; return 1; } int policydb_role_isvalid(struct policydb *p, unsigned int role) { if (!role || role > p->p_roles.nprim) return 0; return 1; } int policydb_type_isvalid(struct policydb *p, unsigned int type) { if (!type || type > p->p_types.nprim) return 0; return 1; } /* * Return 1 if the fields in the security context * structure `c' are valid. Return 0 otherwise. */ int policydb_context_isvalid(struct policydb *p, struct context *c) { struct role_datum *role; struct user_datum *usrdatum; if (!c->role || c->role > p->p_roles.nprim) return 0; if (!c->user || c->user > p->p_users.nprim) return 0; if (!c->type || c->type > p->p_types.nprim) return 0; if (c->role != OBJECT_R_VAL) { /* * Role must be authorized for the type. */ role = p->role_val_to_struct[c->role - 1]; if (!role || !ebitmap_get_bit(&role->types, c->type - 1)) /* role may not be associated with type */ return 0; /* * User must be authorized for the role. */ usrdatum = p->user_val_to_struct[c->user - 1]; if (!usrdatum) return 0; if (!ebitmap_get_bit(&usrdatum->roles, c->role - 1)) /* user may not be associated with role */ return 0; } if (!mls_context_isvalid(p, c)) return 0; return 1; } /* * Read a MLS range structure from a policydb binary * representation file. */ static int mls_read_range_helper(struct mls_range *r, void *fp) { __le32 buf[2]; u32 items; int rc; rc = next_entry(buf, fp, sizeof(u32)); if (rc) goto out; rc = -EINVAL; items = le32_to_cpu(buf[0]); if (items > ARRAY_SIZE(buf)) { pr_err("SELinux: mls: range overflow\n"); goto out; } rc = next_entry(buf, fp, sizeof(u32) * items); if (rc) { pr_err("SELinux: mls: truncated range\n"); goto out; } r->level[0].sens = le32_to_cpu(buf[0]); if (items > 1) r->level[1].sens = le32_to_cpu(buf[1]); else r->level[1].sens = r->level[0].sens; rc = ebitmap_read(&r->level[0].cat, fp); if (rc) { pr_err("SELinux: mls: error reading low categories\n"); goto out; } if (items > 1) { rc = ebitmap_read(&r->level[1].cat, fp); if (rc) { pr_err("SELinux: mls: error reading high categories\n"); goto bad_high; } } else { rc = ebitmap_cpy(&r->level[1].cat, &r->level[0].cat); if (rc) { pr_err("SELinux: mls: out of memory\n"); goto bad_high; } } return 0; bad_high: ebitmap_destroy(&r->level[0].cat); out: return rc; } /* * Read and validate a security context structure * from a policydb binary representation file. */ static int context_read_and_validate(struct context *c, struct policydb *p, void *fp) { __le32 buf[3]; int rc; rc = next_entry(buf, fp, sizeof buf); if (rc) { pr_err("SELinux: context truncated\n"); goto out; } c->user = le32_to_cpu(buf[0]); c->role = le32_to_cpu(buf[1]); c->type = le32_to_cpu(buf[2]); if (p->policyvers >= POLICYDB_VERSION_MLS) { rc = mls_read_range_helper(&c->range, fp); if (rc) { pr_err("SELinux: error reading MLS range of context\n"); goto out; } } rc = -EINVAL; if (!policydb_context_isvalid(p, c)) { pr_err("SELinux: invalid security context\n"); context_destroy(c); goto out; } rc = 0; out: return rc; } /* * The following *_read functions are used to * read the symbol data from a policy database * binary representation file. */ static int str_read(char **strp, gfp_t flags, void *fp, u32 len) { int rc; char *str; if ((len == 0) || (len == (u32)-1)) return -EINVAL; str = kmalloc(len + 1, flags | __GFP_NOWARN); if (!str) return -ENOMEM; rc = next_entry(str, fp, len); if (rc) { kfree(str); return rc; } str[len] = '\0'; *strp = str; return 0; } static int perm_read(struct policydb *p, struct symtab *s, void *fp) { char *key = NULL; struct perm_datum *perdatum; int rc; __le32 buf[2]; u32 len; perdatum = kzalloc(sizeof(*perdatum), GFP_KERNEL); if (!perdatum) return -ENOMEM; rc = next_entry(buf, fp, sizeof buf); if (rc) goto bad; len = le32_to_cpu(buf[0]); perdatum->value = le32_to_cpu(buf[1]); rc = str_read(&key, GFP_KERNEL, fp, len); if (rc) goto bad; rc = symtab_insert(s, key, perdatum); if (rc) goto bad; return 0; bad: perm_destroy(key, perdatum, NULL); return rc; } static int common_read(struct policydb *p, struct symtab *s, void *fp) { char *key = NULL; struct common_datum *comdatum; __le32 buf[4]; u32 len, nel; int i, rc; comdatum = kzalloc(sizeof(*comdatum), GFP_KERNEL); if (!comdatum) return -ENOMEM; rc = next_entry(buf, fp, sizeof buf); if (rc) goto bad; len = le32_to_cpu(buf[0]); comdatum->value = le32_to_cpu(buf[1]); nel = le32_to_cpu(buf[3]); rc = symtab_init(&comdatum->permissions, nel); if (rc) goto bad; comdatum->permissions.nprim = le32_to_cpu(buf[2]); rc = str_read(&key, GFP_KERNEL, fp, len); if (rc) goto bad; for (i = 0; i < nel; i++) { rc = perm_read(p, &comdatum->permissions, fp); if (rc) goto bad; } rc = symtab_insert(s, key, comdatum); if (rc) goto bad; return 0; bad: common_destroy(key, comdatum, NULL); return rc; } static void type_set_init(struct type_set *t) { ebitmap_init(&t->types); ebitmap_init(&t->negset); } static int type_set_read(struct type_set *t, void *fp) { __le32 buf[1]; int rc; if (ebitmap_read(&t->types, fp)) return -EINVAL; if (ebitmap_read(&t->negset, fp)) return -EINVAL; rc = next_entry(buf, fp, sizeof(u32)); if (rc < 0) return -EINVAL; t->flags = le32_to_cpu(buf[0]); return 0; } static int read_cons_helper(struct policydb *p, struct constraint_node **nodep, int ncons, int allowxtarget, void *fp) { struct constraint_node *c, *lc; struct constraint_expr *e, *le; __le32 buf[3]; u32 nexpr; int rc, i, j, depth; lc = NULL; for (i = 0; i < ncons; i++) { c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) return -ENOMEM; if (lc) lc->next = c; else *nodep = c; rc = next_entry(buf, fp, (sizeof(u32) * 2)); if (rc) return rc; c->permissions = le32_to_cpu(buf[0]); nexpr = le32_to_cpu(buf[1]); le = NULL; depth = -1; for (j = 0; j < nexpr; j++) { e = kzalloc(sizeof(*e), GFP_KERNEL); if (!e) return -ENOMEM; if (le) le->next = e; else c->expr = e; rc = next_entry(buf, fp, (sizeof(u32) * 3)); if (rc) return rc; e->expr_type = le32_to_cpu(buf[0]); e->attr = le32_to_cpu(buf[1]); e->op = le32_to_cpu(buf[2]); switch (e->expr_type) { case CEXPR_NOT: if (depth < 0) return -EINVAL; break; case CEXPR_AND: case CEXPR_OR: if (depth < 1) return -EINVAL; depth--; break; case CEXPR_ATTR: if (depth == (CEXPR_MAXDEPTH - 1)) return -EINVAL; depth++; break; case CEXPR_NAMES: if (!allowxtarget && (e->attr & CEXPR_XTARGET)) return -EINVAL; if (depth == (CEXPR_MAXDEPTH - 1)) return -EINVAL; depth++; rc = ebitmap_read(&e->names, fp); if (rc) return rc; if (p->policyvers >= POLICYDB_VERSION_CONSTRAINT_NAMES) { e->type_names = kzalloc(sizeof (*e->type_names), GFP_KERNEL); if (!e->type_names) return -ENOMEM; type_set_init(e->type_names); rc = type_set_read(e->type_names, fp); if (rc) return rc; } break; default: return -EINVAL; } le = e; } if (depth != 0) return -EINVAL; lc = c; } return 0; } static int class_read(struct policydb *p, struct symtab *s, void *fp) { char *key = NULL; struct class_datum *cladatum; __le32 buf[6]; u32 len, len2, ncons, nel; int i, rc; cladatum = kzalloc(sizeof(*cladatum), GFP_KERNEL); if (!cladatum) return -ENOMEM; rc = next_entry(buf, fp, sizeof(u32)*6); if (rc) goto bad; len = le32_to_cpu(buf[0]); len2 = le32_to_cpu(buf[1]); cladatum->value = le32_to_cpu(buf[2]); nel = le32_to_cpu(buf[4]); rc = symtab_init(&cladatum->permissions, nel); if (rc) goto bad; cladatum->permissions.nprim = le32_to_cpu(buf[3]); ncons = le32_to_cpu(buf[5]); rc = str_read(&key, GFP_KERNEL, fp, len); if (rc) goto bad; if (len2) { rc = str_read(&cladatum->comkey, GFP_KERNEL, fp, len2); if (rc) goto bad; rc = -EINVAL; cladatum->comdatum = symtab_search(&p->p_commons, cladatum->comkey); if (!cladatum->comdatum) { pr_err("SELinux: unknown common %s\n", cladatum->comkey); goto bad; } } for (i = 0; i < nel; i++) { rc = perm_read(p, &cladatum->permissions, fp); if (rc) goto bad; } rc = read_cons_helper(p, &cladatum->constraints, ncons, 0, fp); if (rc) goto bad; if (p->policyvers >= POLICYDB_VERSION_VALIDATETRANS) { /* grab the validatetrans rules */ rc = next_entry(buf, fp, sizeof(u32)); if (rc) goto bad; ncons = le32_to_cpu(buf[0]); rc = read_cons_helper(p, &cladatum->validatetrans, ncons, 1, fp); if (rc) goto bad; } if (p->policyvers >= POLICYDB_VERSION_NEW_OBJECT_DEFAULTS) { rc = next_entry(buf, fp, sizeof(u32) * 3); if (rc) goto bad; cladatum->default_user = le32_to_cpu(buf[0]); cladatum->default_role = le32_to_cpu(buf[1]); cladatum->default_range = le32_to_cpu(buf[2]); } if (p->policyvers >= POLICYDB_VERSION_DEFAULT_TYPE) { rc = next_entry(buf, fp, sizeof(u32) * 1); if (rc) goto bad; cladatum->default_type = le32_to_cpu(buf[0]); } rc = symtab_insert(s, key, cladatum); if (rc) goto bad; return 0; bad: cls_destroy(key, cladatum, NULL); return rc; } static int role_read(struct policydb *p, struct symtab *s, void *fp) { char *key = NULL; struct role_datum *role; int rc, to_read = 2; __le32 buf[3]; u32 len; role = kzalloc(sizeof(*role), GFP_KERNEL); if (!role) return -ENOMEM; if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) to_read = 3; rc = next_entry(buf, fp, sizeof(buf[0]) * to_read); if (rc) goto bad; len = le32_to_cpu(buf[0]); role->value = le32_to_cpu(buf[1]); if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) role->bounds = le32_to_cpu(buf[2]); rc = str_read(&key, GFP_KERNEL, fp, len); if (rc) goto bad; rc = ebitmap_read(&role->dominates, fp); if (rc) goto bad; rc = ebitmap_read(&role->types, fp); if (rc) goto bad; if (strcmp(key, OBJECT_R) == 0) { rc = -EINVAL; if (role->value != OBJECT_R_VAL) { pr_err("SELinux: Role %s has wrong value %d\n", OBJECT_R, role->value); goto bad; } rc = 0; goto bad; } rc = symtab_insert(s, key, role); if (rc) goto bad; return 0; bad: role_destroy(key, role, NULL); return rc; } static int type_read(struct policydb *p, struct symtab *s, void *fp) { char *key = NULL; struct type_datum *typdatum; int rc, to_read = 3; __le32 buf[4]; u32 len; typdatum = kzalloc(sizeof(*typdatum), GFP_KERNEL); if (!typdatum) return -ENOMEM; if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) to_read = 4; rc = next_entry(buf, fp, sizeof(buf[0]) * to_read); if (rc) goto bad; len = le32_to_cpu(buf[0]); typdatum->value = le32_to_cpu(buf[1]); if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) { u32 prop = le32_to_cpu(buf[2]); if (prop & TYPEDATUM_PROPERTY_PRIMARY) typdatum->primary = 1; if (prop & TYPEDATUM_PROPERTY_ATTRIBUTE) typdatum->attribute = 1; typdatum->bounds = le32_to_cpu(buf[3]); } else { typdatum->primary = le32_to_cpu(buf[2]); } rc = str_read(&key, GFP_KERNEL, fp, len); if (rc) goto bad; rc = symtab_insert(s, key, typdatum); if (rc) goto bad; return 0; bad: type_destroy(key, typdatum, NULL); return rc; } /* * Read a MLS level structure from a policydb binary * representation file. */ static int mls_read_level(struct mls_level *lp, void *fp) { __le32 buf[1]; int rc; memset(lp, 0, sizeof(*lp)); rc = next_entry(buf, fp, sizeof buf); if (rc) { pr_err("SELinux: mls: truncated level\n"); return rc; } lp->sens = le32_to_cpu(buf[0]); rc = ebitmap_read(&lp->cat, fp); if (rc) { pr_err("SELinux: mls: error reading level categories\n"); return rc; } return 0; } static int user_read(struct policydb *p, struct symtab *s, void *fp) { char *key = NULL; struct user_datum *usrdatum; int rc, to_read = 2; __le32 buf[3]; u32 len; usrdatum = kzalloc(sizeof(*usrdatum), GFP_KERNEL); if (!usrdatum) return -ENOMEM; if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) to_read = 3; rc = next_entry(buf, fp, sizeof(buf[0]) * to_read); if (rc) goto bad; len = le32_to_cpu(buf[0]); usrdatum->value = le32_to_cpu(buf[1]); if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) usrdatum->bounds = le32_to_cpu(buf[2]); rc = str_read(&key, GFP_KERNEL, fp, len); if (rc) goto bad; rc = ebitmap_read(&usrdatum->roles, fp); if (rc) goto bad; if (p->policyvers >= POLICYDB_VERSION_MLS) { rc = mls_read_range_helper(&usrdatum->range, fp); if (rc) goto bad; rc = mls_read_level(&usrdatum->dfltlevel, fp); if (rc) goto bad; } rc = symtab_insert(s, key, usrdatum); if (rc) goto bad; return 0; bad: user_destroy(key, usrdatum, NULL); return rc; } static int sens_read(struct policydb *p, struct symtab *s, void *fp) { char *key = NULL; struct level_datum *levdatum; int rc; __le32 buf[2]; u32 len; levdatum = kzalloc(sizeof(*levdatum), GFP_ATOMIC); if (!levdatum) return -ENOMEM; rc = next_entry(buf, fp, sizeof buf); if (rc) goto bad; len = le32_to_cpu(buf[0]); levdatum->isalias = le32_to_cpu(buf[1]); rc = str_read(&key, GFP_ATOMIC, fp, len); if (rc) goto bad; rc = -ENOMEM; levdatum->level = kmalloc(sizeof(*levdatum->level), GFP_ATOMIC); if (!levdatum->level) goto bad; rc = mls_read_level(levdatum->level, fp); if (rc) goto bad; rc = symtab_insert(s, key, levdatum); if (rc) goto bad; return 0; bad: sens_destroy(key, levdatum, NULL); return rc; } static int cat_read(struct policydb *p, struct symtab *s, void *fp) { char *key = NULL; struct cat_datum *catdatum; int rc; __le32 buf[3]; u32 len; catdatum = kzalloc(sizeof(*catdatum), GFP_ATOMIC); if (!catdatum) return -ENOMEM; rc = next_entry(buf, fp, sizeof buf); if (rc) goto bad; len = le32_to_cpu(buf[0]); catdatum->value = le32_to_cpu(buf[1]); catdatum->isalias = le32_to_cpu(buf[2]); rc = str_read(&key, GFP_ATOMIC, fp, len); if (rc) goto bad; rc = symtab_insert(s, key, catdatum); if (rc) goto bad; return 0; bad: cat_destroy(key, catdatum, NULL); return rc; } static int (*read_f[SYM_NUM]) (struct policydb *p, struct symtab *s, void *fp) = { common_read, class_read, role_read, type_read, user_read, cond_read_bool, sens_read, cat_read, }; static int user_bounds_sanity_check(void *key, void *datum, void *datap) { struct user_datum *upper, *user; struct policydb *p = datap; int depth = 0; upper = user = datum; while (upper->bounds) { struct ebitmap_node *node; unsigned long bit; if (++depth == POLICYDB_BOUNDS_MAXDEPTH) { pr_err("SELinux: user %s: " "too deep or looped boundary", (char *) key); return -EINVAL; } upper = p->user_val_to_struct[upper->bounds - 1]; ebitmap_for_each_positive_bit(&user->roles, node, bit) { if (ebitmap_get_bit(&upper->roles, bit)) continue; pr_err("SELinux: boundary violated policy: " "user=%s role=%s bounds=%s\n", sym_name(p, SYM_USERS, user->value - 1), sym_name(p, SYM_ROLES, bit), sym_name(p, SYM_USERS, upper->value - 1)); return -EINVAL; } } return 0; } static int role_bounds_sanity_check(void *key, void *datum, void *datap) { struct role_datum *upper, *role; struct policydb *p = datap; int depth = 0; upper = role = datum; while (upper->bounds) { struct ebitmap_node *node; unsigned long bit; if (++depth == POLICYDB_BOUNDS_MAXDEPTH) { pr_err("SELinux: role %s: " "too deep or looped bounds\n", (char *) key); return -EINVAL; } upper = p->role_val_to_struct[upper->bounds - 1]; ebitmap_for_each_positive_bit(&role->types, node, bit) { if (ebitmap_get_bit(&upper->types, bit)) continue; pr_err("SELinux: boundary violated policy: " "role=%s type=%s bounds=%s\n", sym_name(p, SYM_ROLES, role->value - 1), sym_name(p, SYM_TYPES, bit), sym_name(p, SYM_ROLES, upper->value - 1)); return -EINVAL; } } return 0; } static int type_bounds_sanity_check(void *key, void *datum, void *datap) { struct type_datum *upper; struct policydb *p = datap; int depth = 0; upper = datum; while (upper->bounds) { if (++depth == POLICYDB_BOUNDS_MAXDEPTH) { pr_err("SELinux: type %s: " "too deep or looped boundary\n", (char *) key); return -EINVAL; } upper = p->type_val_to_struct[upper->bounds - 1]; BUG_ON(!upper); if (upper->attribute) { pr_err("SELinux: type %s: " "bounded by attribute %s", (char *) key, sym_name(p, SYM_TYPES, upper->value - 1)); return -EINVAL; } } return 0; } static int policydb_bounds_sanity_check(struct policydb *p) { int rc; if (p->policyvers < POLICYDB_VERSION_BOUNDARY) return 0; rc = hashtab_map(&p->p_users.table, user_bounds_sanity_check, p); if (rc) return rc; rc = hashtab_map(&p->p_roles.table, role_bounds_sanity_check, p); if (rc) return rc; rc = hashtab_map(&p->p_types.table, type_bounds_sanity_check, p); if (rc) return rc; return 0; } u16 string_to_security_class(struct policydb *p, const char *name) { struct class_datum *cladatum; cladatum = symtab_search(&p->p_classes, name); if (!cladatum) return 0; return cladatum->value; } u32 string_to_av_perm(struct policydb *p, u16 tclass, const char *name) { struct class_datum *cladatum; struct perm_datum *perdatum = NULL; struct common_datum *comdatum; if (!tclass || tclass > p->p_classes.nprim) return 0; cladatum = p->class_val_to_struct[tclass-1]; comdatum = cladatum->comdatum; if (comdatum) perdatum = symtab_search(&comdatum->permissions, name); if (!perdatum) perdatum = symtab_search(&cladatum->permissions, name); if (!perdatum) return 0; return 1U << (perdatum->value-1); } static int range_read(struct policydb *p, void *fp) { struct range_trans *rt = NULL; struct mls_range *r = NULL; int i, rc; __le32 buf[2]; u32 nel; if (p->policyvers < POLICYDB_VERSION_MLS) return 0; rc = next_entry(buf, fp, sizeof(u32)); if (rc) return rc; nel = le32_to_cpu(buf[0]); rc = hashtab_init(&p->range_tr, nel); if (rc) return rc; for (i = 0; i < nel; i++) { rc = -ENOMEM; rt = kzalloc(sizeof(*rt), GFP_KERNEL); if (!rt) goto out; rc = next_entry(buf, fp, (sizeof(u32) * 2)); if (rc) goto out; rt->source_type = le32_to_cpu(buf[0]); rt->target_type = le32_to_cpu(buf[1]); if (p->policyvers >= POLICYDB_VERSION_RANGETRANS) { rc = next_entry(buf, fp, sizeof(u32)); if (rc) goto out; rt->target_class = le32_to_cpu(buf[0]); } else rt->target_class = p->process_class; rc = -EINVAL; if (!policydb_type_isvalid(p, rt->source_type) || !policydb_type_isvalid(p, rt->target_type) || !policydb_class_isvalid(p, rt->target_class)) goto out; rc = -ENOMEM; r = kzalloc(sizeof(*r), GFP_KERNEL); if (!r) goto out; rc = mls_read_range_helper(r, fp); if (rc) goto out; rc = -EINVAL; if (!mls_range_isvalid(p, r)) { pr_warn("SELinux: rangetrans: invalid range\n"); goto out; } rc = hashtab_insert(&p->range_tr, rt, r, rangetr_key_params); if (rc) goto out; rt = NULL; r = NULL; } hash_eval(&p->range_tr, "rangetr"); rc = 0; out: kfree(rt); kfree(r); return rc; } static int filename_trans_read_helper_compat(struct policydb *p, void *fp) { struct filename_trans_key key, *ft = NULL; struct filename_trans_datum *last, *datum = NULL; char *name = NULL; u32 len, stype, otype; __le32 buf[4]; int rc; /* length of the path component string */ rc = next_entry(buf, fp, sizeof(u32)); if (rc) return rc; len = le32_to_cpu(buf[0]); /* path component string */ rc = str_read(&name, GFP_KERNEL, fp, len); if (rc) return rc; rc = next_entry(buf, fp, sizeof(u32) * 4); if (rc) goto out; stype = le32_to_cpu(buf[0]); key.ttype = le32_to_cpu(buf[1]); key.tclass = le32_to_cpu(buf[2]); key.name = name; otype = le32_to_cpu(buf[3]); last = NULL; datum = policydb_filenametr_search(p, &key); while (datum) { if (unlikely(ebitmap_get_bit(&datum->stypes, stype - 1))) { /* conflicting/duplicate rules are ignored */ datum = NULL; goto out; } if (likely(datum->otype == otype)) break; last = datum; datum = datum->next; } if (!datum) { rc = -ENOMEM; datum = kmalloc(sizeof(*datum), GFP_KERNEL); if (!datum) goto out; ebitmap_init(&datum->stypes); datum->otype = otype; datum->next = NULL; if (unlikely(last)) { last->next = datum; } else { rc = -ENOMEM; ft = kmemdup(&key, sizeof(key), GFP_KERNEL); if (!ft) goto out; rc = hashtab_insert(&p->filename_trans, ft, datum, filenametr_key_params); if (rc) goto out; name = NULL; rc = ebitmap_set_bit(&p->filename_trans_ttypes, key.ttype, 1); if (rc) return rc; } } kfree(name); return ebitmap_set_bit(&datum->stypes, stype - 1, 1); out: kfree(ft); kfree(name); kfree(datum); return rc; } static int filename_trans_read_helper(struct policydb *p, void *fp) { struct filename_trans_key *ft = NULL; struct filename_trans_datum **dst, *datum, *first = NULL; char *name = NULL; u32 len, ttype, tclass, ndatum, i; __le32 buf[3]; int rc; /* length of the path component string */ rc = next_entry(buf, fp, sizeof(u32)); if (rc) return rc; len = le32_to_cpu(buf[0]); /* path component string */ rc = str_read(&name, GFP_KERNEL, fp, len); if (rc) return rc; rc = next_entry(buf, fp, sizeof(u32) * 3); if (rc) goto out; ttype = le32_to_cpu(buf[0]); tclass = le32_to_cpu(buf[1]); ndatum = le32_to_cpu(buf[2]); if (ndatum == 0) { pr_err("SELinux: Filename transition key with no datum\n"); rc = -ENOENT; goto out; } dst = &first; for (i = 0; i < ndatum; i++) { rc = -ENOMEM; datum = kmalloc(sizeof(*datum), GFP_KERNEL); if (!datum) goto out; *dst = datum; /* ebitmap_read() will at least init the bitmap */ rc = ebitmap_read(&datum->stypes, fp); if (rc) goto out; rc = next_entry(buf, fp, sizeof(u32)); if (rc) goto out; datum->otype = le32_to_cpu(buf[0]); datum->next = NULL; dst = &datum->next; } rc = -ENOMEM; ft = kmalloc(sizeof(*ft), GFP_KERNEL); if (!ft) goto out; ft->ttype = ttype; ft->tclass = tclass; ft->name = name; rc = hashtab_insert(&p->filename_trans, ft, first, filenametr_key_params); if (rc == -EEXIST) pr_err("SELinux: Duplicate filename transition key\n"); if (rc) goto out; return ebitmap_set_bit(&p->filename_trans_ttypes, ttype, 1); out: kfree(ft); kfree(name); while (first) { datum = first; first = first->next; ebitmap_destroy(&datum->stypes); kfree(datum); } return rc; } static int filename_trans_read(struct policydb *p, void *fp) { u32 nel; __le32 buf[1]; int rc, i; if (p->policyvers < POLICYDB_VERSION_FILENAME_TRANS) return 0; rc = next_entry(buf, fp, sizeof(u32)); if (rc) return rc; nel = le32_to_cpu(buf[0]); if (p->policyvers < POLICYDB_VERSION_COMP_FTRANS) { p->compat_filename_trans_count = nel; rc = hashtab_init(&p->filename_trans, (1 << 11)); if (rc) return rc; for (i = 0; i < nel; i++) { rc = filename_trans_read_helper_compat(p, fp); if (rc) return rc; } } else { rc = hashtab_init(&p->filename_trans, nel); if (rc) return rc; for (i = 0; i < nel; i++) { rc = filename_trans_read_helper(p, fp); if (rc) return rc; } } hash_eval(&p->filename_trans, "filenametr"); return 0; } static int genfs_read(struct policydb *p, void *fp) { int i, j, rc; u32 nel, nel2, len, len2; __le32 buf[1]; struct ocontext *l, *c; struct ocontext *newc = NULL; struct genfs *genfs_p, *genfs; struct genfs *newgenfs = NULL; rc = next_entry(buf, fp, sizeof(u32)); if (rc) return rc; nel = le32_to_cpu(buf[0]); for (i = 0; i < nel; i++) { rc = next_entry(buf, fp, sizeof(u32)); if (rc) goto out; len = le32_to_cpu(buf[0]); rc = -ENOMEM; newgenfs = kzalloc(sizeof(*newgenfs), GFP_KERNEL); if (!newgenfs) goto out; rc = str_read(&newgenfs->fstype, GFP_KERNEL, fp, len); if (rc) goto out; for (genfs_p = NULL, genfs = p->genfs; genfs; genfs_p = genfs, genfs = genfs->next) { rc = -EINVAL; if (strcmp(newgenfs->fstype, genfs->fstype) == 0) { pr_err("SELinux: dup genfs fstype %s\n", newgenfs->fstype); goto out; } if (strcmp(newgenfs->fstype, genfs->fstype) < 0) break; } newgenfs->next = genfs; if (genfs_p) genfs_p->next = newgenfs; else p->genfs = newgenfs; genfs = newgenfs; newgenfs = NULL; rc = next_entry(buf, fp, sizeof(u32)); if (rc) goto out; nel2 = le32_to_cpu(buf[0]); for (j = 0; j < nel2; j++) { rc = next_entry(buf, fp, sizeof(u32)); if (rc) goto out; len = le32_to_cpu(buf[0]); rc = -ENOMEM; newc = kzalloc(sizeof(*newc), GFP_KERNEL); if (!newc) goto out; rc = str_read(&newc->u.name, GFP_KERNEL, fp, len); if (rc) goto out; rc = next_entry(buf, fp, sizeof(u32)); if (rc) goto out; newc->v.sclass = le32_to_cpu(buf[0]); rc = context_read_and_validate(&newc->context[0], p, fp); if (rc) goto out; for (l = NULL, c = genfs->head; c; l = c, c = c->next) { rc = -EINVAL; if (!strcmp(newc->u.name, c->u.name) && (!c->v.sclass || !newc->v.sclass || newc->v.sclass == c->v.sclass)) { pr_err("SELinux: dup genfs entry (%s,%s)\n", genfs->fstype, c->u.name); goto out; } len = strlen(newc->u.name); len2 = strlen(c->u.name); if (len > len2) break; } newc->next = c; if (l) l->next = newc; else genfs->head = newc; newc = NULL; } } rc = 0; out: if (newgenfs) { kfree(newgenfs->fstype); kfree(newgenfs); } ocontext_destroy(newc, OCON_FSUSE); return rc; } static int ocontext_read(struct policydb *p, struct policydb_compat_info *info, void *fp) { int i, j, rc; u32 nel, len; __be64 prefixbuf[1]; __le32 buf[3]; struct ocontext *l, *c; u32 nodebuf[8]; for (i = 0; i < info->ocon_num; i++) { rc = next_entry(buf, fp, sizeof(u32)); if (rc) goto out; nel = le32_to_cpu(buf[0]); l = NULL; for (j = 0; j < nel; j++) { rc = -ENOMEM; c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) goto out; if (l) l->next = c; else p->ocontexts[i] = c; l = c; switch (i) { case OCON_ISID: rc = next_entry(buf, fp, sizeof(u32)); if (rc) goto out; c->sid[0] = le32_to_cpu(buf[0]); rc = context_read_and_validate(&c->context[0], p, fp); if (rc) goto out; break; case OCON_FS: case OCON_NETIF: rc = next_entry(buf, fp, sizeof(u32)); if (rc) goto out; len = le32_to_cpu(buf[0]); rc = str_read(&c->u.name, GFP_KERNEL, fp, len); if (rc) goto out; rc = context_read_and_validate(&c->context[0], p, fp); if (rc) goto out; rc = context_read_and_validate(&c->context[1], p, fp); if (rc) goto out; break; case OCON_PORT: rc = next_entry(buf, fp, sizeof(u32)*3); if (rc) goto out; c->u.port.protocol = le32_to_cpu(buf[0]); c->u.port.low_port = le32_to_cpu(buf[1]); c->u.port.high_port = le32_to_cpu(buf[2]); rc = context_read_and_validate(&c->context[0], p, fp); if (rc) goto out; break; case OCON_NODE: rc = next_entry(nodebuf, fp, sizeof(u32) * 2); if (rc) goto out; c->u.node.addr = nodebuf[0]; /* network order */ c->u.node.mask = nodebuf[1]; /* network order */ rc = context_read_and_validate(&c->context[0], p, fp); if (rc) goto out; break; case OCON_FSUSE: rc = next_entry(buf, fp, sizeof(u32)*2); if (rc) goto out; rc = -EINVAL; c->v.behavior = le32_to_cpu(buf[0]); /* Determined at runtime, not in policy DB. */ if (c->v.behavior == SECURITY_FS_USE_MNTPOINT) goto out; if (c->v.behavior > SECURITY_FS_USE_MAX) goto out; len = le32_to_cpu(buf[1]); rc = str_read(&c->u.name, GFP_KERNEL, fp, len); if (rc) goto out; rc = context_read_and_validate(&c->context[0], p, fp); if (rc) goto out; break; case OCON_NODE6: { int k; rc = next_entry(nodebuf, fp, sizeof(u32) * 8); if (rc) goto out; for (k = 0; k < 4; k++) c->u.node6.addr[k] = nodebuf[k]; for (k = 0; k < 4; k++) c->u.node6.mask[k] = nodebuf[k+4]; rc = context_read_and_validate(&c->context[0], p, fp); if (rc) goto out; break; } case OCON_IBPKEY: { u32 pkey_lo, pkey_hi; rc = next_entry(prefixbuf, fp, sizeof(u64)); if (rc) goto out; /* we need to have subnet_prefix in CPU order */ c->u.ibpkey.subnet_prefix = be64_to_cpu(prefixbuf[0]); rc = next_entry(buf, fp, sizeof(u32) * 2); if (rc) goto out; pkey_lo = le32_to_cpu(buf[0]); pkey_hi = le32_to_cpu(buf[1]); if (pkey_lo > U16_MAX || pkey_hi > U16_MAX) { rc = -EINVAL; goto out; } c->u.ibpkey.low_pkey = pkey_lo; c->u.ibpkey.high_pkey = pkey_hi; rc = context_read_and_validate(&c->context[0], p, fp); if (rc) goto out; break; } case OCON_IBENDPORT: { u32 port; rc = next_entry(buf, fp, sizeof(u32) * 2); if (rc) goto out; len = le32_to_cpu(buf[0]); rc = str_read(&c->u.ibendport.dev_name, GFP_KERNEL, fp, len); if (rc) goto out; port = le32_to_cpu(buf[1]); if (port > U8_MAX || port == 0) { rc = -EINVAL; goto out; } c->u.ibendport.port = port; rc = context_read_and_validate(&c->context[0], p, fp); if (rc) goto out; break; } /* end case */ } /* end switch */ } } rc = 0; out: return rc; } /* * Read the configuration data from a policy database binary * representation file into a policy database structure. */ int policydb_read(struct policydb *p, void *fp) { struct role_allow *ra, *lra; struct role_trans_key *rtk = NULL; struct role_trans_datum *rtd = NULL; int i, j, rc; __le32 buf[4]; u32 len, nprim, nel, perm; char *policydb_str; struct policydb_compat_info *info; policydb_init(p); /* Read the magic number and string length. */ rc = next_entry(buf, fp, sizeof(u32) * 2); if (rc) goto bad; rc = -EINVAL; if (le32_to_cpu(buf[0]) != POLICYDB_MAGIC) { pr_err("SELinux: policydb magic number 0x%x does " "not match expected magic number 0x%x\n", le32_to_cpu(buf[0]), POLICYDB_MAGIC); goto bad; } rc = -EINVAL; len = le32_to_cpu(buf[1]); if (len != strlen(POLICYDB_STRING)) { pr_err("SELinux: policydb string length %d does not " "match expected length %zu\n", len, strlen(POLICYDB_STRING)); goto bad; } rc = -ENOMEM; policydb_str = kmalloc(len + 1, GFP_KERNEL); if (!policydb_str) { pr_err("SELinux: unable to allocate memory for policydb " "string of length %d\n", len); goto bad; } rc = next_entry(policydb_str, fp, len); if (rc) { pr_err("SELinux: truncated policydb string identifier\n"); kfree(policydb_str); goto bad; } rc = -EINVAL; policydb_str[len] = '\0'; if (strcmp(policydb_str, POLICYDB_STRING)) { pr_err("SELinux: policydb string %s does not match " "my string %s\n", policydb_str, POLICYDB_STRING); kfree(policydb_str); goto bad; } /* Done with policydb_str. */ kfree(policydb_str); policydb_str = NULL; /* Read the version and table sizes. */ rc = next_entry(buf, fp, sizeof(u32)*4); if (rc) goto bad; rc = -EINVAL; p->policyvers = le32_to_cpu(buf[0]); if (p->policyvers < POLICYDB_VERSION_MIN || p->policyvers > POLICYDB_VERSION_MAX) { pr_err("SELinux: policydb version %d does not match " "my version range %d-%d\n", le32_to_cpu(buf[0]), POLICYDB_VERSION_MIN, POLICYDB_VERSION_MAX); goto bad; } if ((le32_to_cpu(buf[1]) & POLICYDB_CONFIG_MLS)) { p->mls_enabled = 1; rc = -EINVAL; if (p->policyvers < POLICYDB_VERSION_MLS) { pr_err("SELinux: security policydb version %d " "(MLS) not backwards compatible\n", p->policyvers); goto bad; } } p->reject_unknown = !!(le32_to_cpu(buf[1]) & REJECT_UNKNOWN); p->allow_unknown = !!(le32_to_cpu(buf[1]) & ALLOW_UNKNOWN); if (p->policyvers >= POLICYDB_VERSION_POLCAP) { rc = ebitmap_read(&p->policycaps, fp); if (rc) goto bad; } if (p->policyvers >= POLICYDB_VERSION_PERMISSIVE) { rc = ebitmap_read(&p->permissive_map, fp); if (rc) goto bad; } rc = -EINVAL; info = policydb_lookup_compat(p->policyvers); if (!info) { pr_err("SELinux: unable to find policy compat info " "for version %d\n", p->policyvers); goto bad; } rc = -EINVAL; if (le32_to_cpu(buf[2]) != info->sym_num || le32_to_cpu(buf[3]) != info->ocon_num) { pr_err("SELinux: policydb table sizes (%d,%d) do " "not match mine (%d,%d)\n", le32_to_cpu(buf[2]), le32_to_cpu(buf[3]), info->sym_num, info->ocon_num); goto bad; } for (i = 0; i < info->sym_num; i++) { rc = next_entry(buf, fp, sizeof(u32)*2); if (rc) goto bad; nprim = le32_to_cpu(buf[0]); nel = le32_to_cpu(buf[1]); rc = symtab_init(&p->symtab[i], nel); if (rc) goto out; if (i == SYM_ROLES) { rc = roles_init(p); if (rc) goto out; } for (j = 0; j < nel; j++) { rc = read_f[i](p, &p->symtab[i], fp); if (rc) goto bad; } p->symtab[i].nprim = nprim; } rc = -EINVAL; p->process_class = string_to_security_class(p, "process"); if (!p->process_class) { pr_err("SELinux: process class is required, not defined in policy\n"); goto bad; } rc = avtab_read(&p->te_avtab, fp, p); if (rc) goto bad; if (p->policyvers >= POLICYDB_VERSION_BOOL) { rc = cond_read_list(p, fp); if (rc) goto bad; } rc = next_entry(buf, fp, sizeof(u32)); if (rc) goto bad; nel = le32_to_cpu(buf[0]); rc = hashtab_init(&p->role_tr, nel); if (rc) goto bad; for (i = 0; i < nel; i++) { rc = -ENOMEM; rtk = kmalloc(sizeof(*rtk), GFP_KERNEL); if (!rtk) goto bad; rc = -ENOMEM; rtd = kmalloc(sizeof(*rtd), GFP_KERNEL); if (!rtd) goto bad; rc = next_entry(buf, fp, sizeof(u32)*3); if (rc) goto bad; rc = -EINVAL; rtk->role = le32_to_cpu(buf[0]); rtk->type = le32_to_cpu(buf[1]); rtd->new_role = le32_to_cpu(buf[2]); if (p->policyvers >= POLICYDB_VERSION_ROLETRANS) { rc = next_entry(buf, fp, sizeof(u32)); if (rc) goto bad; rtk->tclass = le32_to_cpu(buf[0]); } else rtk->tclass = p->process_class; rc = -EINVAL; if (!policydb_role_isvalid(p, rtk->role) || !policydb_type_isvalid(p, rtk->type) || !policydb_class_isvalid(p, rtk->tclass) || !policydb_role_isvalid(p, rtd->new_role)) goto bad; rc = hashtab_insert(&p->role_tr, rtk, rtd, roletr_key_params); if (rc) goto bad; rtk = NULL; rtd = NULL; } rc = next_entry(buf, fp, sizeof(u32)); if (rc) goto bad; nel = le32_to_cpu(buf[0]); lra = NULL; for (i = 0; i < nel; i++) { rc = -ENOMEM; ra = kzalloc(sizeof(*ra), GFP_KERNEL); if (!ra)