1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_DESC_H #define _ASM_X86_DESC_H #include <asm/desc_defs.h> #include <asm/ldt.h> #include <asm/mmu.h> #include <asm/fixmap.h> #include <asm/irq_vectors.h> #include <asm/cpu_entry_area.h> #include <linux/smp.h> #include <linux/percpu.h> static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info) { desc->limit0 = info->limit & 0x0ffff; desc->base0 = (info->base_addr & 0x0000ffff); desc->base1 = (info->base_addr & 0x00ff0000) >> 16; desc->type = (info->read_exec_only ^ 1) << 1; desc->type |= info->contents << 2; /* Set the ACCESS bit so it can be mapped RO */ desc->type |= 1; desc->s = 1; desc->dpl = 0x3; desc->p = info->seg_not_present ^ 1; desc->limit1 = (info->limit & 0xf0000) >> 16; desc->avl = info->useable; desc->d = info->seg_32bit; desc->g = info->limit_in_pages; desc->base2 = (info->base_addr & 0xff000000) >> 24; /* * Don't allow setting of the lm bit. It would confuse * user_64bit_mode and would get overridden by sysret anyway. */ desc->l = 0; } struct gdt_page { struct desc_struct gdt[GDT_ENTRIES]; } __attribute__((aligned(PAGE_SIZE))); DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); /* Provide the original GDT */ static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu) { return per_cpu(gdt_page, cpu).gdt; } /* Provide the current original GDT */ static inline struct desc_struct *get_current_gdt_rw(void) { return this_cpu_ptr(&gdt_page)->gdt; } /* Provide the fixmap address of the remapped GDT */ static inline struct desc_struct *get_cpu_gdt_ro(int cpu) { return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt; } /* Provide the current read-only GDT */ static inline struct desc_struct *get_current_gdt_ro(void) { return get_cpu_gdt_ro(smp_processor_id()); } /* Provide the physical address of the GDT page. */ static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu) { return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu)); } static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func, unsigned dpl, unsigned ist, unsigned seg) { gate->offset_low = (u16) func; gate->bits.p = 1; gate->bits.dpl = dpl; gate->bits.zero = 0; gate->bits.type = type; gate->offset_middle = (u16) (func >> 16); #ifdef CONFIG_X86_64 gate->segment = __KERNEL_CS; gate->bits.ist = ist; gate->reserved = 0; gate->offset_high = (u32) (func >> 32); #else gate->segment = seg; gate->bits.ist = 0; #endif } static inline int desc_empty(const void *ptr) { const u32 *desc = ptr; return !(desc[0] | desc[1]); } #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #define load_TR_desc() native_load_tr_desc() #define load_gdt(dtr) native_load_gdt(dtr) #define load_idt(dtr) native_load_idt(dtr) #define load_tr(tr) asm volatile("ltr %0"::"m" (tr)) #define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt)) #define store_gdt(dtr) native_store_gdt(dtr) #define store_tr(tr) (tr = native_store_tr()) #define load_TLS(t, cpu) native_load_tls(t, cpu) #define set_ldt native_set_ldt #define write_ldt_entry(dt, entry, desc) native_write_ldt_entry(dt, entry, desc) #define write_gdt_entry(dt, entry, desc, type) native_write_gdt_entry(dt, entry, desc, type) #define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g) static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) { } static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) { } #endif /* CONFIG_PARAVIRT_XXL */ #define store_ldt(ldt) asm("sldt %0" : "=m"(ldt)) static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate) { memcpy(&idt[entry], gate, sizeof(*gate)); } static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, const void *desc) { memcpy(&ldt[entry], desc, 8); } static inline void native_write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc, int type) { unsigned int size; switch (type) { case DESC_TSS: size = sizeof(tss_desc); break; case DESC_LDT: size = sizeof(ldt_desc); break; default: size = sizeof(*gdt); break; } memcpy(&gdt[entry], desc, size); } static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size) { struct ldttss_desc *desc = d; memset(desc, 0, sizeof(*desc)); desc->limit0 = (u16) size; desc->base0 = (u16) addr; desc->base1 = (addr >> 16) & 0xFF; desc->type = type; desc->p = 1; desc->limit1 = (size >> 16) & 0xF; desc->base2 = (addr >> 24) & 0xFF; #ifdef CONFIG_X86_64 desc->base3 = (u32) (addr >> 32); #endif } static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr) { struct desc_struct *d = get_cpu_gdt_rw(cpu); tss_desc tss; set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS, __KERNEL_TSS_LIMIT); write_gdt_entry(d, entry, &tss, DESC_TSS); } #define set_tss_desc(cpu, addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) static inline void native_set_ldt(const void *addr, unsigned int entries) { if (likely(entries == 0)) asm volatile("lldt %w0"::"q" (0)); else { unsigned cpu = smp_processor_id(); ldt_desc ldt; set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT, entries * LDT_ENTRY_SIZE - 1); write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_LDT, &ldt, DESC_LDT); asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); } } static inline void native_load_gdt(const struct desc_ptr *dtr) { asm volatile("lgdt %0"::"m" (*dtr)); } static __always_inline void native_load_idt(const struct desc_ptr *dtr) { asm volatile("lidt %0"::"m" (*dtr)); } static inline void native_store_gdt(struct desc_ptr *dtr) { asm volatile("sgdt %0":"=m" (*dtr)); } static inline void store_idt(struct desc_ptr *dtr) { asm volatile("sidt %0":"=m" (*dtr)); } /* * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is * a read-only remapping. To prevent a page fault, the GDT is switched to the * original writeable version when needed. */ #ifdef CONFIG_X86_64 static inline void native_load_tr_desc(void) { struct desc_ptr gdt; int cpu = raw_smp_processor_id(); bool restore = 0; struct desc_struct *fixmap_gdt; native_store_gdt(&gdt); fixmap_gdt = get_cpu_gdt_ro(cpu); /* * If the current GDT is the read-only fixmap, swap to the original * writeable version. Swap back at the end. */ if (gdt.address == (unsigned long)fixmap_gdt) { load_direct_gdt(cpu); restore = 1; } asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); if (restore) load_fixmap_gdt(cpu); } #else static inline void native_load_tr_desc(void) { asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); } #endif static inline unsigned long native_store_tr(void) { unsigned long tr; asm volatile("str %0":"=r" (tr)); return tr; } static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) { struct desc_struct *gdt = get_cpu_gdt_rw(cpu); unsigned int i; for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; } DECLARE_PER_CPU(bool, __tss_limit_invalid); static inline void force_reload_TR(void) { struct desc_struct *d = get_current_gdt_rw(); tss_desc tss; memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc)); /* * LTR requires an available TSS, and the TSS is currently * busy. Make it be available so that LTR will work. */ tss.type = DESC_TSS; write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS); load_TR_desc(); this_cpu_write(__tss_limit_invalid, false); } /* * Call this if you need the TSS limit to be correct, which should be the case * if and only if you have TIF_IO_BITMAP set or you're switching to a task * with TIF_IO_BITMAP set. */ static inline void refresh_tss_limit(void) { DEBUG_LOCKS_WARN_ON(preemptible()); if (unlikely(this_cpu_read(__tss_limit_invalid))) force_reload_TR(); } /* * If you do something evil that corrupts the cached TSS limit (I'm looking * at you, VMX exits), call this function. * * The optimization here is that the TSS limit only matters for Linux if the * IO bitmap is in use. If the TSS limit gets forced to its minimum value, * everything works except that IO bitmap will be ignored and all CPL 3 IO * instructions will #GP, which is exactly what we want for normal tasks. */ static inline void invalidate_tss_limit(void) { DEBUG_LOCKS_WARN_ON(preemptible()); if (unlikely(test_thread_flag(TIF_IO_BITMAP))) force_reload_TR(); else this_cpu_write(__tss_limit_invalid, true); } /* This intentionally ignores lm, since 32-bit apps don't have that field. */ #define LDT_empty(info) \ ((info)->base_addr == 0 && \ (info)->limit == 0 && \ (info)->contents == 0 && \ (info)->read_exec_only == 1 && \ (info)->seg_32bit == 0 && \ (info)->limit_in_pages == 0 && \ (info)->seg_not_present == 1 && \ (info)->useable == 0) /* Lots of programs expect an all-zero user_desc to mean "no segment at all". */ static inline bool LDT_zero(const struct user_desc *info) { return (info->base_addr == 0 && info->limit == 0 && info->contents == 0 && info->read_exec_only == 0 && info->seg_32bit == 0 && info->limit_in_pages == 0 && info->seg_not_present == 0 && info->useable == 0); } static inline void clear_LDT(void) { set_ldt(NULL, 0); } static inline unsigned long get_desc_base(const struct desc_struct *desc) { return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); } static inline void set_desc_base(struct desc_struct *desc, unsigned long base) { desc->base0 = base & 0xffff; desc->base1 = (base >> 16) & 0xff; desc->base2 = (base >> 24) & 0xff; } static inline unsigned long get_desc_limit(const struct desc_struct *desc) { return desc->limit0 | (desc->limit1 << 16); } static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) { desc->limit0 = limit & 0xffff; desc->limit1 = (limit >> 16) & 0xf; } void alloc_intr_gate(unsigned int n, const void *addr); static inline void init_idt_data(struct idt_data *data, unsigned int n, const void *addr) { BUG_ON(n > 0xFF); memset(data, 0, sizeof(*data)); data->vector = n; data->addr = addr; data->segment = __KERNEL_CS; data->bits.type = GATE_INTERRUPT; data->bits.p = 1; } static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d) { unsigned long addr = (unsigned long) d->addr; gate->offset_low = (u16) addr; gate->segment = (u16) d->segment; gate->bits = d->bits; gate->offset_middle = (u16) (addr >> 16); #ifdef CONFIG_X86_64 gate->offset_high = (u32) (addr >> 32); gate->reserved = 0; #endif } extern unsigned long system_vectors[]; extern void load_current_idt(void); extern void idt_setup_early_handler(void); extern void idt_setup_early_traps(void); extern void idt_setup_traps(void); extern void idt_setup_apic_and_irq_gates(void); extern bool idt_is_f00f_address(unsigned long address); #ifdef CONFIG_X86_64 extern void idt_setup_early_pf(void); extern void idt_setup_ist_traps(void); #else static inline void idt_setup_early_pf(void) { } static inline void idt_setup_ist_traps(void) { } #endif extern void idt_invalidate(void *addr); #endif /* _ASM_X86_DESC_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PART_STAT_H #define _LINUX_PART_STAT_H #include <linux/genhd.h> struct disk_stats { u64 nsecs[NR_STAT_GROUPS]; unsigned long sectors[NR_STAT_GROUPS]; unsigned long ios[NR_STAT_GROUPS]; unsigned long merges[NR_STAT_GROUPS]; unsigned long io_ticks; local_t in_flight[2]; }; /* * Macros to operate on percpu disk statistics: * * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters and should * be called between disk_stat_lock() and disk_stat_unlock(). * * part_stat_read() can be called at any time. */ #define part_stat_lock() preempt_disable() #define part_stat_unlock() preempt_enable() #define part_stat_get_cpu(part, field, cpu) \ (per_cpu_ptr((part)->dkstats, (cpu))->field) #define part_stat_get(part, field) \ part_stat_get_cpu(part, field, smp_processor_id()) #define part_stat_read(part, field) \ ({ \ typeof((part)->dkstats->field) res = 0; \ unsigned int _cpu; \ for_each_possible_cpu(_cpu) \ res += per_cpu_ptr((part)->dkstats, _cpu)->field; \ res; \ }) static inline void part_stat_set_all(struct hd_struct *part, int value) { int i; for_each_possible_cpu(i) memset(per_cpu_ptr(part->dkstats, i), value, sizeof(struct disk_stats)); } #define part_stat_read_accum(part, field) \ (part_stat_read(part, field[STAT_READ]) + \ part_stat_read(part, field[STAT_WRITE]) + \ part_stat_read(part, field[STAT_DISCARD])) #define __part_stat_add(part, field, addnd) \ __this_cpu_add((part)->dkstats->field, addnd) #define part_stat_add(part, field, addnd) do { \ __part_stat_add((part), field, addnd); \ if ((part)->partno) \ __part_stat_add(&part_to_disk((part))->part0, \ field, addnd); \ } while (0) #define part_stat_dec(gendiskp, field) \ part_stat_add(gendiskp, field, -1) #define part_stat_inc(gendiskp, field) \ part_stat_add(gendiskp, field, 1) #define part_stat_sub(gendiskp, field, subnd) \ part_stat_add(gendiskp, field, -subnd) #define part_stat_local_dec(gendiskp, field) \ local_dec(&(part_stat_get(gendiskp, field))) #define part_stat_local_inc(gendiskp, field) \ local_inc(&(part_stat_get(gendiskp, field))) #define part_stat_local_read(gendiskp, field) \ local_read(&(part_stat_get(gendiskp, field))) #define part_stat_local_read_cpu(gendiskp, field, cpu) \ local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) #endif /* _LINUX_PART_STAT_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Fast and scalable bitmaps. * * Copyright (C) 2016 Facebook * Copyright (C) 2013-2014 Jens Axboe */ #ifndef __LINUX_SCALE_BITMAP_H #define __LINUX_SCALE_BITMAP_H #include <linux/kernel.h> #include <linux/slab.h> struct seq_file; /** * struct sbitmap_word - Word in a &struct sbitmap. */ struct sbitmap_word { /** * @depth: Number of bits being used in @word/@cleared */ unsigned long depth; /** * @word: word holding free bits */ unsigned long word ____cacheline_aligned_in_smp; /** * @cleared: word holding cleared bits */ unsigned long cleared ____cacheline_aligned_in_smp; /** * @swap_lock: Held while swapping word <-> cleared */ spinlock_t swap_lock; } ____cacheline_aligned_in_smp; /** * struct sbitmap - Scalable bitmap. * * A &struct sbitmap is spread over multiple cachelines to avoid ping-pong. This * trades off higher memory usage for better scalability. */ struct sbitmap { /** * @depth: Number of bits used in the whole bitmap. */ unsigned int depth; /** * @shift: log2(number of bits used per word) */ unsigned int shift; /** * @map_nr: Number of words (cachelines) being used for the bitmap. */ unsigned int map_nr; /** * @map: Allocated bitmap. */ struct sbitmap_word *map; }; #define SBQ_WAIT_QUEUES 8 #define SBQ_WAKE_BATCH 8 /** * struct sbq_wait_state - Wait queue in a &struct sbitmap_queue. */ struct sbq_wait_state { /** * @wait_cnt: Number of frees remaining before we wake up. */ atomic_t wait_cnt; /** * @wait: Wait queue. */ wait_queue_head_t wait; } ____cacheline_aligned_in_smp; /** * struct sbitmap_queue - Scalable bitmap with the added ability to wait on free * bits. * * A &struct sbitmap_queue uses multiple wait queues and rolling wakeups to * avoid contention on the wait queue spinlock. This ensures that we don't hit a * scalability wall when we run out of free bits and have to start putting tasks * to sleep. */ struct sbitmap_queue { /** * @sb: Scalable bitmap. */ struct sbitmap sb; /* * @alloc_hint: Cache of last successfully allocated or freed bit. * * This is per-cpu, which allows multiple users to stick to different * cachelines until the map is exhausted. */ unsigned int __percpu *alloc_hint; /** * @wake_batch: Number of bits which must be freed before we wake up any * waiters. */ unsigned int wake_batch; /** * @wake_index: Next wait queue in @ws to wake up. */ atomic_t wake_index; /** * @ws: Wait queues. */ struct sbq_wait_state *ws; /* * @ws_active: count of currently active ws waitqueues */ atomic_t ws_active; /** * @round_robin: Allocate bits in strict round-robin order. */ bool round_robin; /** * @min_shallow_depth: The minimum shallow depth which may be passed to * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow(). */ unsigned int min_shallow_depth; }; /** * sbitmap_init_node() - Initialize a &struct sbitmap on a specific memory node. * @sb: Bitmap to initialize. * @depth: Number of bits to allocate. * @shift: Use 2^@shift bits per word in the bitmap; if a negative number if * given, a good default is chosen. * @flags: Allocation flags. * @node: Memory node to allocate on. * * Return: Zero on success or negative errno on failure. */ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, gfp_t flags, int node); /** * sbitmap_free() - Free memory used by a &struct sbitmap. * @sb: Bitmap to free. */ static inline void sbitmap_free(struct sbitmap *sb) { kfree(sb->map); sb->map = NULL; } /** * sbitmap_resize() - Resize a &struct sbitmap. * @sb: Bitmap to resize. * @depth: New number of bits to resize to. * * Doesn't reallocate anything. It's up to the caller to ensure that the new * depth doesn't exceed the depth that the sb was initialized with. */ void sbitmap_resize(struct sbitmap *sb, unsigned int depth); /** * sbitmap_get() - Try to allocate a free bit from a &struct sbitmap. * @sb: Bitmap to allocate from. * @alloc_hint: Hint for where to start searching for a free bit. * @round_robin: If true, be stricter about allocation order; always allocate * starting from the last allocated bit. This is less efficient * than the default behavior (false). * * This operation provides acquire barrier semantics if it succeeds. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin); /** * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, * limiting the depth used from each word. * @sb: Bitmap to allocate from. * @alloc_hint: Hint for where to start searching for a free bit. * @shallow_depth: The maximum number of bits to allocate from a single word. * * This rather specific operation allows for having multiple users with * different allocation limits. E.g., there can be a high-priority class that * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow() * with a @shallow_depth of (1 << (@sb->shift - 1)). Then, the low-priority * class can only allocate half of the total bits in the bitmap, preventing it * from starving out the high-priority class. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, unsigned long shallow_depth); /** * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap. * @sb: Bitmap to check. * * Return: true if any bit in the bitmap is set, false otherwise. */ bool sbitmap_any_bit_set(const struct sbitmap *sb); #define SB_NR_TO_INDEX(sb, bitnr) ((bitnr) >> (sb)->shift) #define SB_NR_TO_BIT(sb, bitnr) ((bitnr) & ((1U << (sb)->shift) - 1U)) typedef bool (*sb_for_each_fn)(struct sbitmap *, unsigned int, void *); /** * __sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap. * @start: Where to start the iteration. * @sb: Bitmap to iterate over. * @fn: Callback. Should return true to continue or false to break early. * @data: Pointer to pass to callback. * * This is inline even though it's non-trivial so that the function calls to the * callback will hopefully get optimized away. */ static inline void __sbitmap_for_each_set(struct sbitmap *sb, unsigned int start, sb_for_each_fn fn, void *data) { unsigned int index; unsigned int nr; unsigned int scanned = 0; if (start >= sb->depth) start = 0; index = SB_NR_TO_INDEX(sb, start); nr = SB_NR_TO_BIT(sb, start); while (scanned < sb->depth) { unsigned long word; unsigned int depth = min_t(unsigned int, sb->map[index].depth - nr, sb->depth - scanned); scanned += depth; word = sb->map[index].word & ~sb->map[index].cleared; if (!word) goto next; /* * On the first iteration of the outer loop, we need to add the * bit offset back to the size of the word for find_next_bit(). * On all other iterations, nr is zero, so this is a noop. */ depth += nr; while (1) { nr = find_next_bit(&word, depth, nr); if (nr >= depth) break; if (!fn(sb, (index << sb->shift) + nr, data)) return; nr++; } next: nr = 0; if (++index >= sb->map_nr) index = 0; } } /** * sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap. * @sb: Bitmap to iterate over. * @fn: Callback. Should return true to continue or false to break early. * @data: Pointer to pass to callback. */ static inline void sbitmap_for_each_set(struct sbitmap *sb, sb_for_each_fn fn, void *data) { __sbitmap_for_each_set(sb, 0, fn, data); } static inline unsigned long *__sbitmap_word(struct sbitmap *sb, unsigned int bitnr) { return &sb->map[SB_NR_TO_INDEX(sb, bitnr)].word; } /* Helpers equivalent to the operations in asm/bitops.h and linux/bitmap.h */ static inline void sbitmap_set_bit(struct sbitmap *sb, unsigned int bitnr) { set_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr) { clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } /* * This one is special, since it doesn't actually clear the bit, rather it * sets the corresponding bit in the ->cleared mask instead. Paired with * the caller doing sbitmap_deferred_clear() if a given index is full, which * will clear the previously freed entries in the corresponding ->word. */ static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr) { unsigned long *addr = &sb->map[SB_NR_TO_INDEX(sb, bitnr)].cleared; set_bit(SB_NR_TO_BIT(sb, bitnr), addr); } static inline void sbitmap_clear_bit_unlock(struct sbitmap *sb, unsigned int bitnr) { clear_bit_unlock(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) { return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } /** * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file. * @sb: Bitmap to show. * @m: struct seq_file to write to. * * This is intended for debugging. The format may change at any time. */ void sbitmap_show(struct sbitmap *sb, struct seq_file *m); /** * sbitmap_bitmap_show() - Write a hex dump of a &struct sbitmap to a &struct * seq_file. * @sb: Bitmap to show. * @m: struct seq_file to write to. * * This is intended for debugging. The output isn't guaranteed to be internally * consistent. */ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m); /** * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific * memory node. * @sbq: Bitmap queue to initialize. * @depth: See sbitmap_init_node(). * @shift: See sbitmap_init_node(). * @round_robin: See sbitmap_get(). * @flags: Allocation flags. * @node: Memory node to allocate on. * * Return: Zero on success or negative errno on failure. */ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, int shift, bool round_robin, gfp_t flags, int node); /** * sbitmap_queue_free() - Free memory used by a &struct sbitmap_queue. * * @sbq: Bitmap queue to free. */ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq) { kfree(sbq->ws); free_percpu(sbq->alloc_hint); sbitmap_free(&sbq->sb); } /** * sbitmap_queue_resize() - Resize a &struct sbitmap_queue. * @sbq: Bitmap queue to resize. * @depth: New number of bits to resize to. * * Like sbitmap_resize(), this doesn't reallocate anything. It has to do * some extra work on the &struct sbitmap_queue, so it's not safe to just * resize the underlying &struct sbitmap. */ void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth); /** * __sbitmap_queue_get() - Try to allocate a free bit from a &struct * sbitmap_queue with preemption already disabled. * @sbq: Bitmap queue to allocate from. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ int __sbitmap_queue_get(struct sbitmap_queue *sbq); /** * __sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct * sbitmap_queue, limiting the depth used from each word, with preemption * already disabled. * @sbq: Bitmap queue to allocate from. * @shallow_depth: The maximum number of bits to allocate from a single word. * See sbitmap_get_shallow(). * * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after * initializing @sbq. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, unsigned int shallow_depth); /** * sbitmap_queue_get() - Try to allocate a free bit from a &struct * sbitmap_queue. * @sbq: Bitmap queue to allocate from. * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to * sbitmap_queue_clear()). * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, unsigned int *cpu) { int nr; *cpu = get_cpu(); nr = __sbitmap_queue_get(sbq); put_cpu(); return nr; } /** * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct * sbitmap_queue, limiting the depth used from each word. * @sbq: Bitmap queue to allocate from. * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to * sbitmap_queue_clear()). * @shallow_depth: The maximum number of bits to allocate from a single word. * See sbitmap_get_shallow(). * * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after * initializing @sbq. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ static inline int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, unsigned int *cpu, unsigned int shallow_depth) { int nr; *cpu = get_cpu(); nr = __sbitmap_queue_get_shallow(sbq, shallow_depth); put_cpu(); return nr; } /** * sbitmap_queue_min_shallow_depth() - Inform a &struct sbitmap_queue of the * minimum shallow depth that will be used. * @sbq: Bitmap queue in question. * @min_shallow_depth: The minimum shallow depth that will be passed to * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow(). * * sbitmap_queue_clear() batches wakeups as an optimization. The batch size * depends on the depth of the bitmap. Since the shallow allocation functions * effectively operate with a different depth, the shallow depth must be taken * into account when calculating the batch size. This function must be called * with the minimum shallow depth that will be used. Failure to do so can result * in missed wakeups. */ void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, unsigned int min_shallow_depth); /** * sbitmap_queue_clear() - Free an allocated bit and wake up waiters on a * &struct sbitmap_queue. * @sbq: Bitmap to free from. * @nr: Bit number to free. * @cpu: CPU the bit was allocated on. */ void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu); static inline int sbq_index_inc(int index) { return (index + 1) & (SBQ_WAIT_QUEUES - 1); } static inline void sbq_index_atomic_inc(atomic_t *index) { int old = atomic_read(index); int new = sbq_index_inc(old); atomic_cmpxchg(index, old, new); } /** * sbq_wait_ptr() - Get the next wait queue to use for a &struct * sbitmap_queue. * @sbq: Bitmap queue to wait on. * @wait_index: A counter per "user" of @sbq. */ static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq, atomic_t *wait_index) { struct sbq_wait_state *ws; ws = &sbq->ws[atomic_read(wait_index)]; sbq_index_atomic_inc(wait_index); return ws; } /** * sbitmap_queue_wake_all() - Wake up everything waiting on a &struct * sbitmap_queue. * @sbq: Bitmap queue to wake up. */ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); /** * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue * on a &struct sbitmap_queue. * @sbq: Bitmap queue to wake up. */ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq); /** * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct * seq_file. * @sbq: Bitmap queue to show. * @m: struct seq_file to write to. * * This is intended for debugging. The format may change at any time. */ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m); struct sbq_wait { struct sbitmap_queue *sbq; /* if set, sbq_wait is accounted */ struct wait_queue_entry wait; }; #define DEFINE_SBQ_WAIT(name) \ struct sbq_wait name = { \ .sbq = NULL, \ .wait = { \ .private = current, \ .func = autoremove_wake_function, \ .entry = LIST_HEAD_INIT((name).wait.entry), \ } \ } /* * Wrapper around prepare_to_wait_exclusive(), which maintains some extra * internal state. */ void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait, int state); /* * Must be paired with sbitmap_prepare_to_wait(). */ void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait); /* * Wrapper around add_wait_queue(), which maintains some extra internal state */ void sbitmap_add_wait_queue(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait); /* * Must be paired with sbitmap_add_wait_queue() */ void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait); #endif /* __LINUX_SCALE_BITMAP_H */
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2001 Momchil Velikov * Portions Copyright (C) 2001 Christoph Hellwig * Copyright (C) 2005 SGI, Christoph Lameter * Copyright (C) 2006 Nick Piggin * Copyright (C) 2012 Konstantin Khlebnikov * Copyright (C) 2016 Intel, Matthew Wilcox * Copyright (C) 2016 Intel, Ross Zwisler */ #include <linux/bitmap.h> #include <linux/bitops.h> #include <linux/bug.h> #include <linux/cpu.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/idr.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/kmemleak.h> #include <linux/percpu.h> #include <linux/preempt.h> /* in_interrupt() */ #include <linux/radix-tree.h> #include <linux/rcupdate.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/xarray.h> /* * Radix tree node cache. */ struct kmem_cache *radix_tree_node_cachep; /* * The radix tree is variable-height, so an insert operation not only has * to build the branch to its corresponding item, it also has to build the * branch to existing items if the size has to be increased (by * radix_tree_extend). * * The worst case is a zero height tree with just a single item at index 0, * and then inserting an item at index ULONG_MAX. This requires 2 new branches * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared. * Hence: */ #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1) /* * The IDR does not have to be as high as the radix tree since it uses * signed integers, not unsigned longs. */ #define IDR_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(int) - 1) #define IDR_MAX_PATH (DIV_ROUND_UP(IDR_INDEX_BITS, \ RADIX_TREE_MAP_SHIFT)) #define IDR_PRELOAD_SIZE (IDR_MAX_PATH * 2 - 1) /* * Per-cpu pool of preloaded nodes */ DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { .lock = INIT_LOCAL_LOCK(lock), }; EXPORT_PER_CPU_SYMBOL_GPL(radix_tree_preloads); static inline struct radix_tree_node *entry_to_node(void *ptr) { return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); } static inline void *node_to_entry(void *ptr) { return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); } #define RADIX_TREE_RETRY XA_RETRY_ENTRY static inline unsigned long get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot) { return parent ? slot - parent->slots : 0; } static unsigned int radix_tree_descend(const struct radix_tree_node *parent, struct radix_tree_node **nodep, unsigned long index) { unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; void __rcu **entry = rcu_dereference_raw(parent->slots[offset]); *nodep = (void *)entry; return offset; } static inline gfp_t root_gfp_mask(const struct radix_tree_root *root) { return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK); } static inline void tag_set(struct radix_tree_node *node, unsigned int tag, int offset) { __set_bit(offset, node->tags[tag]); } static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, int offset) { __clear_bit(offset, node->tags[tag]); } static inline int tag_get(const struct radix_tree_node *node, unsigned int tag, int offset) { return test_bit(offset, node->tags[tag]); } static inline void root_tag_set(struct radix_tree_root *root, unsigned tag) { root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT)); } static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag) { root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT)); } static inline void root_tag_clear_all(struct radix_tree_root *root) { root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1); } static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag) { return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT)); } static inline unsigned root_tags_get(const struct radix_tree_root *root) { return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT; } static inline bool is_idr(const struct radix_tree_root *root) { return !!(root->xa_flags & ROOT_IS_IDR); } /* * Returns 1 if any slot in the node has this tag set. * Otherwise returns 0. */ static inline int any_tag_set(const struct radix_tree_node *node, unsigned int tag) { unsigned idx; for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { if (node->tags[tag][idx]) return 1; } return 0; } static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag) { bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE); } /** * radix_tree_find_next_bit - find the next set bit in a memory region * * @addr: The address to base the search on * @size: The bitmap size in bits * @offset: The bitnumber to start searching at * * Unrollable variant of find_next_bit() for constant size arrays. * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero. * Returns next bit offset, or size if nothing found. */ static __always_inline unsigned long radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag, unsigned long offset) { const unsigned long *addr = node->tags[tag]; if (offset < RADIX_TREE_MAP_SIZE) { unsigned long tmp; addr += offset / BITS_PER_LONG; tmp = *addr >> (offset % BITS_PER_LONG); if (tmp) return __ffs(tmp) + offset; offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); while (offset < RADIX_TREE_MAP_SIZE) { tmp = *++addr; if (tmp) return __ffs(tmp) + offset; offset += BITS_PER_LONG; } } return RADIX_TREE_MAP_SIZE; } static unsigned int iter_offset(const struct radix_tree_iter *iter) { return iter->index & RADIX_TREE_MAP_MASK; } /* * The maximum index which can be stored in a radix tree */ static inline unsigned long shift_maxindex(unsigned int shift) { return (RADIX_TREE_MAP_SIZE << shift) - 1; } static inline unsigned long node_maxindex(const struct radix_tree_node *node) { return shift_maxindex(node->shift); } static unsigned long next_index(unsigned long index, const struct radix_tree_node *node, unsigned long offset) { return (index & ~node_maxindex(node)) + (offset << node->shift); } /* * This assumes that the caller has performed appropriate preallocation, and * that the caller has pinned this thread of control to the current CPU. */ static struct radix_tree_node * radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent, struct radix_tree_root *root, unsigned int shift, unsigned int offset, unsigned int count, unsigned int nr_values) { struct radix_tree_node *ret = NULL; /* * Preload code isn't irq safe and it doesn't make sense to use * preloading during an interrupt anyway as all the allocations have * to be atomic. So just do normal allocation when in interrupt. */ if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) { struct radix_tree_preload *rtp; /* * Even if the caller has preloaded, try to allocate from the * cache first for the new node to get accounted to the memory * cgroup. */ ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask | __GFP_NOWARN); if (ret) goto out; /* * Provided the caller has preloaded here, we will always * succeed in getting a node here (and never reach * kmem_cache_alloc) */ rtp = this_cpu_ptr(&radix_tree_preloads); if (rtp->nr) { ret = rtp->nodes; rtp->nodes = ret->parent; rtp->nr--; } /* * Update the allocation stack trace as this is more useful * for debugging. */ kmemleak_update_trace(ret); goto out; } ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); out: BUG_ON(radix_tree_is_internal_node(ret)); if (ret) { ret->shift = shift; ret->offset = offset; ret->count = count; ret->nr_values = nr_values; ret->parent = parent; ret->array = root; } return ret; } void radix_tree_node_rcu_free(struct rcu_head *head) { struct radix_tree_node *node = container_of(head, struct radix_tree_node, rcu_head); /* * Must only free zeroed nodes into the slab. We can be left with * non-NULL entries by radix_tree_free_nodes, so clear the entries * and tags here. */ memset(node->slots, 0, sizeof(node->slots)); memset(node->tags, 0, sizeof(node->tags)); INIT_LIST_HEAD(&node->private_list); kmem_cache_free(radix_tree_node_cachep, node); } static inline void radix_tree_node_free(struct radix_tree_node *node) { call_rcu(&node->rcu_head, radix_tree_node_rcu_free); } /* * Load up this CPU's radix_tree_node buffer with sufficient objects to * ensure that the addition of a single element in the tree cannot fail. On * success, return zero, with preemption disabled. On error, return -ENOMEM * with preemption not disabled. * * To make use of this facility, the radix tree must be initialised without * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). */ static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) { struct radix_tree_preload *rtp; struct radix_tree_node *node; int ret = -ENOMEM; /* * Nodes preloaded by one cgroup can be used by another cgroup, so * they should never be accounted to any particular memory cgroup. */ gfp_mask &= ~__GFP_ACCOUNT; local_lock(&radix_tree_preloads.lock); rtp = this_cpu_ptr(&radix_tree_preloads); while (rtp->nr < nr) { local_unlock(&radix_tree_preloads.lock); node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); if (node == NULL) goto out; local_lock(&radix_tree_preloads.lock); rtp = this_cpu_ptr(&radix_tree_preloads); if (rtp->nr < nr) { node->parent = rtp->nodes; rtp->nodes = node; rtp->nr++; } else { kmem_cache_free(radix_tree_node_cachep, node); } } ret = 0; out: return ret; } /* * Load up this CPU's radix_tree_node buffer with sufficient objects to * ensure that the addition of a single element in the tree cannot fail. On * success, return zero, with preemption disabled. On error, return -ENOMEM * with preemption not disabled. * * To make use of this facility, the radix tree must be initialised without * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). */ int radix_tree_preload(gfp_t gfp_mask) { /* Warn on non-sensical use... */ WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); } EXPORT_SYMBOL(radix_tree_preload); /* * The same as above function, except we don't guarantee preloading happens. * We do it, if we decide it helps. On success, return zero with preemption * disabled. On error, return -ENOMEM with preemption not disabled. */ int radix_tree_maybe_preload(gfp_t gfp_mask) { if (gfpflags_allow_blocking(gfp_mask)) return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); /* Preloading doesn't help anything with this gfp mask, skip it */ local_lock(&radix_tree_preloads.lock); return 0; } EXPORT_SYMBOL(radix_tree_maybe_preload); static unsigned radix_tree_load_root(const struct radix_tree_root *root, struct radix_tree_node **nodep, unsigned long *maxindex) { struct radix_tree_node *node = rcu_dereference_raw(root->xa_head); *nodep = node; if (likely(radix_tree_is_internal_node(node))) { node = entry_to_node(node); *maxindex = node_maxindex(node); return node->shift + RADIX_TREE_MAP_SHIFT; } *maxindex = 0; return 0; } /* * Extend a radix tree so it can store key @index. */ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, unsigned long index, unsigned int shift) { void *entry; unsigned int maxshift; int tag; /* Figure out what the shift should be. */ maxshift = shift; while (index > shift_maxindex(maxshift)) maxshift += RADIX_TREE_MAP_SHIFT; entry = rcu_dereference_raw(root->xa_head); if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE))) goto out; do { struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL, root, shift, 0, 1, 0); if (!node) return -ENOMEM; if (is_idr(root)) { all_tag_set(node, IDR_FREE); if (!root_tag_get(root, IDR_FREE)) { tag_clear(node, IDR_FREE, 0); root_tag_set(root, IDR_FREE); } } else { /* Propagate the aggregated tag info to the new child */ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { if (root_tag_get(root, tag)) tag_set(node, tag, 0); } } BUG_ON(shift > BITS_PER_LONG); if (radix_tree_is_internal_node(entry)) { entry_to_node(entry)->parent = node; } else if (xa_is_value(entry)) { /* Moving a value entry root->xa_head to a node */ node->nr_values = 1; } /* * entry was already in the radix tree, so we do not need * rcu_assign_pointer here */ node->slots[0] = (void __rcu *)entry; entry = node_to_entry(node); rcu_assign_pointer(root->xa_head, entry); shift += RADIX_TREE_MAP_SHIFT; } while (shift <= maxshift); out: return maxshift + RADIX_TREE_MAP_SHIFT; } /** * radix_tree_shrink - shrink radix tree to minimum height * @root radix tree root */ static inline bool radix_tree_shrink(struct radix_tree_root *root) { bool shrunk = false; for (;;) { struct radix_tree_node *node = rcu_dereference_raw(root->xa_head); struct radix_tree_node *child; if (!radix_tree_is_internal_node(node)) break; node = entry_to_node(node); /* * The candidate node has more than one child, or its child * is not at the leftmost slot, we cannot shrink. */ if (node->count != 1) break; child = rcu_dereference_raw(node->slots[0]); if (!child) break; /* * For an IDR, we must not shrink entry 0 into the root in * case somebody calls idr_replace() with a pointer that * appears to be an internal entry */ if (!node->shift && is_idr(root)) break; if (radix_tree_is_internal_node(child)) entry_to_node(child)->parent = NULL; /* * We don't need rcu_assign_pointer(), since we are simply * moving the node from one part of the tree to another: if it * was safe to dereference the old pointer to it * (node->slots[0]), it will be safe to dereference the new * one (root->xa_head) as far as dependent read barriers go. */ root->xa_head = (void __rcu *)child; if (is_idr(root) && !tag_get(node, IDR_FREE, 0)) root_tag_clear(root, IDR_FREE); /* * We have a dilemma here. The node's slot[0] must not be * NULLed in case there are concurrent lookups expecting to * find the item. However if this was a bottom-level node, * then it may be subject to the slot pointer being visible * to callers dereferencing it. If item corresponding to * slot[0] is subsequently deleted, these callers would expect * their slot to become empty sooner or later. * * For example, lockless pagecache will look up a slot, deref * the page pointer, and if the page has 0 refcount it means it * was concurrently deleted from pagecache so try the deref * again. Fortunately there is already a requirement for logic * to retry the entire slot lookup -- the indirect pointer * problem (replacing direct root node with an indirect pointer * also results in a stale slot). So tag the slot as indirect * to force callers to retry. */ node->count = 0; if (!radix_tree_is_internal_node(child)) { node->slots[0] = (void __rcu *)RADIX_TREE_RETRY; } WARN_ON_ONCE(!list_empty(&node->private_list)); radix_tree_node_free(node); shrunk = true; } return shrunk; } static bool delete_node(struct radix_tree_root *root, struct radix_tree_node *node) { bool deleted = false; do { struct radix_tree_node *parent; if (node->count) { if (node_to_entry(node) == rcu_dereference_raw(root->xa_head)) deleted |= radix_tree_shrink(root); return deleted; } parent = node->parent; if (parent) { parent->slots[node->offset] = NULL; parent->count--; } else { /* * Shouldn't the tags already have all been cleared * by the caller? */ if (!is_idr(root)) root_tag_clear_all(root); root->xa_head = NULL; } WARN_ON_ONCE(!list_empty(&node->private_list)); radix_tree_node_free(node); deleted = true; node = parent; } while (node); return deleted; } /** * __radix_tree_create - create a slot in a radix tree * @root: radix tree root * @index: index key * @nodep: returns node * @slotp: returns slot * * Create, if necessary, and return the node and slot for an item * at position @index in the radix tree @root. * * Until there is more than one item in the tree, no nodes are * allocated and @root->xa_head is used as a direct slot instead of * pointing to a node, in which case *@nodep will be NULL. * * Returns -ENOMEM, or 0 for success. */ static int __radix_tree_create(struct radix_tree_root *root, unsigned long index, struct radix_tree_node **nodep, void __rcu ***slotp) { struct radix_tree_node *node = NULL, *child; void __rcu **slot = (void __rcu **)&root->xa_head; unsigned long maxindex; unsigned int shift, offset = 0; unsigned long max = index; gfp_t gfp = root_gfp_mask(root); shift = radix_tree_load_root(root, &child, &maxindex); /* Make sure the tree is high enough. */ if (max > maxindex) { int error = radix_tree_extend(root, gfp, max, shift); if (error < 0) return error; shift = error; child = rcu_dereference_raw(root->xa_head); } while (shift > 0) { shift -= RADIX_TREE_MAP_SHIFT; if (child == NULL) { /* Have to add a child node. */ child = radix_tree_node_alloc(gfp, node, root, shift, offset, 0, 0); if (!child) return -ENOMEM; rcu_assign_pointer(*slot, node_to_entry(child)); if (node) node->count++; } else if (!radix_tree_is_internal_node(child)) break; /* Go a level down */ node = entry_to_node(child); offset = radix_tree_descend(node, &child, index); slot = &node->slots[offset]; } if (nodep) *nodep = node; if (slotp) *slotp = slot; return 0; } /* * Free any nodes below this node. The tree is presumed to not need * shrinking, and any user data in the tree is presumed to not need a * destructor called on it. If we need to add a destructor, we can * add that functionality later. Note that we may not clear tags or * slots from the tree as an RCU walker may still have a pointer into * this subtree. We could replace the entries with RADIX_TREE_RETRY, * but we'll still have to clear those in rcu_free. */ static void radix_tree_free_nodes(struct radix_tree_node *node) { unsigned offset = 0; struct radix_tree_node *child = entry_to_node(node); for (;;) { void *entry = rcu_dereference_raw(child->slots[offset]); if (xa_is_node(entry) && child->shift) { child = entry_to_node(entry); offset = 0; continue; } offset++; while (offset == RADIX_TREE_MAP_SIZE) { struct radix_tree_node *old = child; offset = child->offset + 1; child = child->parent; WARN_ON_ONCE(!list_empty(&old->private_list)); radix_tree_node_free(old); if (old == entry_to_node(node)) return; } } } static inline int insert_entries(struct radix_tree_node *node, void __rcu **slot, void *item, bool replace) { if (*slot) return -EEXIST; rcu_assign_pointer(*slot, item); if (node) { node->count++; if (xa_is_value(item)) node->nr_values++; } return 1; } /** * __radix_tree_insert - insert into a radix tree * @root: radix tree root * @index: index key * @item: item to insert * * Insert an item into the radix tree at position @index. */ int radix_tree_insert(struct radix_tree_root *root, unsigned long index, void *item) { struct radix_tree_node *node; void __rcu **slot; int error; BUG_ON(radix_tree_is_internal_node(item)); error = __radix_tree_create(root, index, &node, &slot); if (error) return error; error = insert_entries(node, slot, item, false); if (error < 0) return error; if (node) { unsigned offset = get_slot_offset(node, slot); BUG_ON(tag_get(node, 0, offset)); BUG_ON(tag_get(node, 1, offset)); BUG_ON(tag_get(node, 2, offset)); } else { BUG_ON(root_tags_get(root)); } return 0; } EXPORT_SYMBOL(radix_tree_insert); /** * __radix_tree_lookup - lookup an item in a radix tree * @root: radix tree root * @index: index key * @nodep: returns node * @slotp: returns slot * * Lookup and return the item at position @index in the radix * tree @root. * * Until there is more than one item in the tree, no nodes are * allocated and @root->xa_head is used as a direct slot instead of * pointing to a node, in which case *@nodep will be NULL. */ void *__radix_tree_lookup(const struct radix_tree_root *root, unsigned long index, struct radix_tree_node **nodep, void __rcu ***slotp) { struct radix_tree_node *node, *parent; unsigned long maxindex; void __rcu **slot; restart: parent = NULL; slot = (void __rcu **)&root->xa_head; radix_tree_load_root(root, &node, &maxindex); if (index > maxindex) return NULL; while (radix_tree_is_internal_node(node)) { unsigned offset; parent = entry_to_node(node); offset = radix_tree_descend(parent, &node, index); slot = parent->slots + offset; if (node == RADIX_TREE_RETRY) goto restart; if (parent->shift == 0) break; } if (nodep) *nodep = parent; if (slotp) *slotp = slot; return node; } /** * radix_tree_lookup_slot - lookup a slot in a radix tree * @root: radix tree root * @index: index key * * Returns: the slot corresponding to the position @index in the * radix tree @root. This is useful for update-if-exists operations. * * This function can be called under rcu_read_lock iff the slot is not * modified by radix_tree_replace_slot, otherwise it must be called * exclusive from other writers. Any dereference of the slot must be done * using radix_tree_deref_slot. */ void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root, unsigned long index) { void __rcu **slot; if (!__radix_tree_lookup(root, index, NULL, &slot)) return NULL; return slot; } EXPORT_SYMBOL(radix_tree_lookup_slot); /** * radix_tree_lookup - perform lookup operation on a radix tree * @root: radix tree root * @index: index key * * Lookup the item at the position @index in the radix tree @root. * * This function can be called under rcu_read_lock, however the caller * must manage lifetimes of leaf nodes (eg. RCU may also be used to free * them safely). No RCU barriers are required to access or modify the * returned item, however. */ void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index) { return __radix_tree_lookup(root, index, NULL, NULL); } EXPORT_SYMBOL(radix_tree_lookup); static void replace_slot(void __rcu **slot, void *item, struct radix_tree_node *node, int count, int values) { if (node && (count || values)) { node->count += count; node->nr_values += values; } rcu_assign_pointer(*slot, item); } static bool node_tag_get(const struct radix_tree_root *root, const struct radix_tree_node *node, unsigned int tag, unsigned int offset) { if (node) return tag_get(node, tag, offset); return root_tag_get(root, tag); } /* * IDR users want to be able to store NULL in the tree, so if the slot isn't * free, don't adjust the count, even if it's transitioning between NULL and * non-NULL. For the IDA, we mark slots as being IDR_FREE while they still * have empty bits, but it only stores NULL in slots when they're being * deleted. */ static int calculate_count(struct radix_tree_root *root, struct radix_tree_node *node, void __rcu **slot, void *item, void *old) { if (is_idr(root)) { unsigned offset = get_slot_offset(node, slot); bool free = node_tag_get(root, node, IDR_FREE, offset); if (!free) return 0; if (!old) return 1; } return !!item - !!old; } /** * __radix_tree_replace - replace item in a slot * @root: radix tree root * @node: pointer to tree node * @slot: pointer to slot in @node * @item: new item to store in the slot. * * For use with __radix_tree_lookup(). Caller must hold tree write locked * across slot lookup and replacement. */ void __radix_tree_replace(struct radix_tree_root *root, struct radix_tree_node *node, void __rcu **slot, void *item) { void *old = rcu_dereference_raw(*slot); int values = !!xa_is_value(item) - !!xa_is_value(old); int count = calculate_count(root, node, slot, item, old); /* * This function supports replacing value entries and * deleting entries, but that needs accounting against the * node unless the slot is root->xa_head. */ WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) && (count || values)); replace_slot(slot, item, node, count, values); if (!node) return; delete_node(root, node); } /** * radix_tree_replace_slot - replace item in a slot * @root: radix tree root * @slot: pointer to slot * @item: new item to store in the slot. * * For use with radix_tree_lookup_slot() and * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked * across slot lookup and replacement. * * NOTE: This cannot be used to switch between non-entries (empty slots), * regular entries, and value entries, as that requires accounting * inside the radix tree node. When switching from one type of entry or * deleting, use __radix_tree_lookup() and __radix_tree_replace() or * radix_tree_iter_replace(). */ void radix_tree_replace_slot(struct radix_tree_root *root, void __rcu **slot, void *item) { __radix_tree_replace(root, NULL, slot, item); } EXPORT_SYMBOL(radix_tree_replace_slot); /** * radix_tree_iter_replace - replace item in a slot * @root: radix tree root * @slot: pointer to slot * @item: new item to store in the slot. * * For use with radix_tree_for_each_slot(). * Caller must hold tree write locked. */ void radix_tree_iter_replace(struct radix_tree_root *root, const struct radix_tree_iter *iter, void __rcu **slot, void *item) { __radix_tree_replace(root, iter->node, slot, item); } static void node_tag_set(struct radix_tree_root *root, struct radix_tree_node *node, unsigned int tag, unsigned int offset) { while (node) { if (tag_get(node, tag, offset)) return; tag_set(node, tag, offset); offset = node->offset; node = node->parent; } if (!root_tag_get(root, tag)) root_tag_set(root, tag); } /** * radix_tree_tag_set - set a tag on a radix tree node * @root: radix tree root * @index: index key * @tag: tag index * * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) * corresponding to @index in the radix tree. From * the root all the way down to the leaf node. * * Returns the address of the tagged item. Setting a tag on a not-present * item is a bug. */ void *radix_tree_tag_set(struct radix_tree_root *root, unsigned long index, unsigned int tag) { struct radix_tree_node *node, *parent; unsigned long maxindex; radix_tree_load_root(root, &node, &maxindex); BUG_ON(index > maxindex); while (radix_tree_is_internal_node(node)) { unsigned offset; parent = entry_to_node(node); offset = radix_tree_descend(parent, &node, index); BUG_ON(!node); if (!tag_get(parent, tag, offset)) tag_set(parent, tag, offset); } /* set the root's tag bit */ if (!root_tag_get(root, tag)) root_tag_set(root, tag); return node; } EXPORT_SYMBOL(radix_tree_tag_set); static void node_tag_clear(struct radix_tree_root *root, struct radix_tree_node *node, unsigned int tag, unsigned int offset) { while (node) { if (!tag_get(node, tag, offset)) return; tag_clear(node, tag, offset); if (any_tag_set(node, tag)) return; offset = node->offset; node = node->parent; } /* clear the root's tag bit */ if (root_tag_get(root, tag)) root_tag_clear(root, tag); } /** * radix_tree_tag_clear - clear a tag on a radix tree node * @root: radix tree root * @index: index key * @tag: tag index * * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) * corresponding to @index in the radix tree. If this causes * the leaf node to have no tags set then clear the tag in the * next-to-leaf node, etc. * * Returns the address of the tagged item on success, else NULL. ie: * has the same return value and semantics as radix_tree_lookup(). */ void *radix_tree_tag_clear(struct radix_tree_root *root, unsigned long index, unsigned int tag) { struct radix_tree_node *node, *parent; unsigned long maxindex; int offset; radix_tree_load_root(root, &node, &maxindex); if (index > maxindex) return NULL; parent = NULL; while (radix_tree_is_internal_node(node)) { parent = entry_to_node(node); offset = radix_tree_descend(parent, &node, index); } if (node) node_tag_clear(root, parent, tag, offset); return node; } EXPORT_SYMBOL(radix_tree_tag_clear); /** * radix_tree_iter_tag_clear - clear a tag on the current iterator entry * @root: radix tree root * @iter: iterator state * @tag: tag to clear */ void radix_tree_iter_tag_clear(struct radix_tree_root *root, const struct radix_tree_iter *iter, unsigned int tag) { node_tag_clear(root, iter->node, tag, iter_offset(iter)); } /** * radix_tree_tag_get - get a tag on a radix tree node * @root: radix tree root * @index: index key * @tag: tag index (< RADIX_TREE_MAX_TAGS) * * Return values: * * 0: tag not present or not set * 1: tag set * * Note that the return value of this function may not be relied on, even if * the RCU lock is held, unless tag modification and node deletion are excluded * from concurrency. */ int radix_tree_tag_get(const struct radix_tree_root *root, unsigned long index, unsigned int tag) { struct radix_tree_node *node, *parent; unsigned long maxindex; if (!root_tag_get(root, tag)) return 0; radix_tree_load_root(root, &node, &maxindex); if (index > maxindex) return 0; while (radix_tree_is_internal_node(node)) { unsigned offset; parent = entry_to_node(node); offset = radix_tree_descend(parent, &node, index); if (!tag_get(parent, tag, offset)) return 0; if (node == RADIX_TREE_RETRY) break; } return 1; } EXPORT_SYMBOL(radix_tree_tag_get); /* Construct iter->tags bit-mask from node->tags[tag] array */ static void set_iter_tags(struct radix_tree_iter *iter, struct radix_tree_node *node, unsigned offset, unsigned tag) { unsigned tag_long = offset / BITS_PER_LONG; unsigned tag_bit = offset % BITS_PER_LONG; if (!node) { iter->tags = 1; return; } iter->tags = node->tags[tag][tag_long] >> tag_bit; /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ if (tag_long < RADIX_TREE_TAG_LONGS - 1) { /* Pick tags from next element */ if (tag_bit) iter->tags |= node->tags[tag][tag_long + 1] << (BITS_PER_LONG - tag_bit); /* Clip chunk size, here only BITS_PER_LONG tags */ iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG); } } void __rcu **radix_tree_iter_resume(void __rcu **slot, struct radix_tree_iter *iter) { slot++; iter->index = __radix_tree_iter_add(iter, 1); iter->next_index = iter->index; iter->tags = 0; return NULL; } EXPORT_SYMBOL(radix_tree_iter_resume); /** * radix_tree_next_chunk - find next chunk of slots for iteration * * @root: radix tree root * @iter: iterator state * @flags: RADIX_TREE_ITER_* flags and tag index * Returns: pointer to chunk first slot, or NULL if iteration is over */ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, struct radix_tree_iter *iter, unsigned flags) { unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; struct radix_tree_node *node, *child; unsigned long index, offset, maxindex; if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) return NULL; /* * Catch next_index overflow after ~0UL. iter->index never overflows * during iterating; it can be zero only at the beginning. * And we cannot overflow iter->next_index in a single step, * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. * * This condition also used by radix_tree_next_slot() to stop * contiguous iterating, and forbid switching to the next chunk. */ index = iter->next_index; if (!index && iter->index) return NULL; restart: radix_tree_load_root(root, &child, &maxindex); if (index > maxindex) return NULL; if (!child) return NULL; if (!radix_tree_is_internal_node(child)) { /* Single-slot tree */ iter->index = index; iter->next_index = maxindex + 1; iter->tags = 1; iter->node = NULL; return (void __rcu **)&root->xa_head; } do { node = entry_to_node(child); offset = radix_tree_descend(node, &child, index); if ((flags & RADIX_TREE_ITER_TAGGED) ? !tag_get(node, tag, offset) : !child) { /* Hole detected */ if (flags & RADIX_TREE_ITER_CONTIG) return NULL; if (flags & RADIX_TREE_ITER_TAGGED) offset = radix_tree_find_next_bit(node, tag, offset + 1); else while (++offset < RADIX_TREE_MAP_SIZE) { void *slot = rcu_dereference_raw( node->slots[offset]); if (slot) break; } index &= ~node_maxindex(node); index += offset << node->shift; /* Overflow after ~0UL */ if (!index) return NULL; if (offset == RADIX_TREE_MAP_SIZE) goto restart; child = rcu_dereference_raw(node->slots[offset]); } if (!child) goto restart; if (child == RADIX_TREE_RETRY) break; } while (node->shift && radix_tree_is_internal_node(child)); /* Update the iterator state */ iter->index = (index &~ node_maxindex(node)) | offset; iter->next_index = (index | node_maxindex(node)) + 1; iter->node = node; if (flags & RADIX_TREE_ITER_TAGGED) set_iter_tags(iter, node, offset, tag); return node->slots + offset; } EXPORT_SYMBOL(radix_tree_next_chunk); /** * radix_tree_gang_lookup - perform multiple lookup on a radix tree * @root: radix tree root * @results: where the results of the lookup are placed * @first_index: start the lookup from this key * @max_items: place up to this many items at *results * * Performs an index-ascending scan of the tree for present items. Places * them at *@results and returns the number of items which were placed at * *@results. * * The implementation is naive. * * Like radix_tree_lookup, radix_tree_gang_lookup may be called under * rcu_read_lock. In this case, rather than the returned results being * an atomic snapshot of the tree at a single point in time, the * semantics of an RCU protected gang lookup are as though multiple * radix_tree_lookups have been issued in individual locks, and results * stored in 'results'. */ unsigned int radix_tree_gang_lookup(const struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items) { struct radix_tree_iter iter; void __rcu **slot; unsigned int ret = 0; if (unlikely(!max_items)) return 0; radix_tree_for_each_slot(slot, root, &iter, first_index) { results[ret] = rcu_dereference_raw(*slot); if (!results[ret]) continue; if (radix_tree_is_internal_node(results[ret])) { slot = radix_tree_iter_retry(&iter); continue; } if (++ret == max_items) break; } return ret; } EXPORT_SYMBOL(radix_tree_gang_lookup); /** * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree * based on a tag * @root: radix tree root * @results: where the results of the lookup are placed * @first_index: start the lookup from this key * @max_items: place up to this many items at *results * @tag: the tag index (< RADIX_TREE_MAX_TAGS) * * Performs an index-ascending scan of the tree for present items which * have the tag indexed by @tag set. Places the items at *@results and * returns the number of items which were placed at *@results. */ unsigned int radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items, unsigned int tag) { struct radix_tree_iter iter; void __rcu **slot; unsigned int ret = 0; if (unlikely(!max_items)) return 0; radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { results[ret] = rcu_dereference_raw(*slot); if (!results[ret]) continue; if (radix_tree_is_internal_node(results[ret])) { slot = radix_tree_iter_retry(&iter); continue; } if (++ret == max_items) break; } return ret; } EXPORT_SYMBOL(radix_tree_gang_lookup_tag); /** * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a * radix tree based on a tag * @root: radix tree root * @results: where the results of the lookup are placed * @first_index: start the lookup from this key * @max_items: place up to this many items at *results * @tag: the tag index (< RADIX_TREE_MAX_TAGS) * * Performs an index-ascending scan of the tree for present items which * have the tag indexed by @tag set. Places the slots at *@results and * returns the number of slots which were placed at *@results. */ unsigned int radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root, void __rcu ***results, unsigned long first_index, unsigned int max_items, unsigned int tag) { struct radix_tree_iter iter; void __rcu **slot; unsigned int ret = 0; if (unlikely(!max_items)) return 0; radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { results[ret] = slot; if (++ret == max_items) break; } return ret; } EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); static bool __radix_tree_delete(struct radix_tree_root *root, struct radix_tree_node *node, void __rcu **slot) { void *old = rcu_dereference_raw(*slot); int values = xa_is_value(old) ? -1 : 0; unsigned offset = get_slot_offset(node, slot); int tag; if (is_idr(root)) node_tag_set(root, node, IDR_FREE, offset); else for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) node_tag_clear(root, node, tag, offset); replace_slot(slot, NULL, node, -1, values); return node && delete_node(root, node); } /** * radix_tree_iter_delete - delete the entry at this iterator position * @root: radix tree root * @iter: iterator state * @slot: pointer to slot * * Delete the entry at the position currently pointed to by the iterator. * This may result in the current node being freed; if it is, the iterator * is advanced so that it will not reference the freed memory. This * function may be called without any locking if there are no other threads * which can access this tree. */ void radix_tree_iter_delete(struct radix_tree_root *root, struct radix_tree_iter *iter, void __rcu **slot) { if (__radix_tree_delete(root, iter->node, slot)) iter->index = iter->next_index; } EXPORT_SYMBOL(radix_tree_iter_delete); /** * radix_tree_delete_item - delete an item from a radix tree * @root: radix tree root * @index: index key * @item: expected item * * Remove @item at @index from the radix tree rooted at @root. * * Return: the deleted entry, or %NULL if it was not present * or the entry at the given @index was not @item. */ void *radix_tree_delete_item(struct radix_tree_root *root, unsigned long index, void *item) { struct radix_tree_node *node = NULL; void __rcu **slot = NULL; void *entry; entry = __radix_tree_lookup(root, index, &node, &slot); if (!slot) return NULL; if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE, get_slot_offset(node, slot)))) return NULL; if (item && entry != item) return NULL; __radix_tree_delete(root, node, slot); return entry; } EXPORT_SYMBOL(radix_tree_delete_item); /** * radix_tree_delete - delete an entry from a radix tree * @root: radix tree root * @index: index key * * Remove the entry at @index from the radix tree rooted at @root. * * Return: The deleted entry, or %NULL if it was not present. */ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) { return radix_tree_delete_item(root, index, NULL); } EXPORT_SYMBOL(radix_tree_delete); /** * radix_tree_tagged - test whether any items in the tree are tagged * @root: radix tree root * @tag: tag to test */ int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag) { return root_tag_get(root, tag); } EXPORT_SYMBOL(radix_tree_tagged); /** * idr_preload - preload for idr_alloc() * @gfp_mask: allocation mask to use for preloading * * Preallocate memory to use for the next call to idr_alloc(). This function * returns with preemption disabled. It will be enabled by idr_preload_end(). */ void idr_preload(gfp_t gfp_mask) { if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE)) local_lock(&radix_tree_preloads.lock); } EXPORT_SYMBOL(idr_preload); void __rcu **idr_get_free(struct radix_tree_root *root, struct radix_tree_iter *iter, gfp_t gfp, unsigned long max) { struct radix_tree_node *node = NULL, *child; void __rcu **slot = (void __rcu **)&root->xa_head; unsigned long maxindex, start = iter->next_index; unsigned int shift, offset = 0; grow: shift = radix_tree_load_root(root, &child, &maxindex); if (!radix_tree_tagged(root, IDR_FREE)) start = max(start, maxindex + 1); if (start > max) return ERR_PTR(-ENOSPC); if (start > maxindex) { int error = radix_tree_extend(root, gfp, start, shift); if (error < 0) return ERR_PTR(error); shift = error; child = rcu_dereference_raw(root->xa_head); } if (start == 0 && shift == 0) shift = RADIX_TREE_MAP_SHIFT; while (shift) { shift -= RADIX_TREE_MAP_SHIFT; if (child == NULL) { /* Have to add a child node. */ child = radix_tree_node_alloc(gfp, node, root, shift, offset, 0, 0); if (!child) return ERR_PTR(-ENOMEM); all_tag_set(child, IDR_FREE); rcu_assign_pointer(*slot, node_to_entry(child)); if (node) node->count++; } else if (!radix_tree_is_internal_node(child)) break; node = entry_to_node(child); offset = radix_tree_descend(node, &child, start); if (!tag_get(node, IDR_FREE, offset)) { offset = radix_tree_find_next_bit(node, IDR_FREE, offset + 1); start = next_index(start, node, offset); if (start > max || start == 0) return ERR_PTR(-ENOSPC); while (offset == RADIX_TREE_MAP_SIZE) { offset = node->offset + 1; node = node->parent; if (!node) goto grow; shift = node->shift; } child = rcu_dereference_raw(node->slots[offset]); } slot = &node->slots[offset]; } iter->index = start; if (node) iter->next_index = 1 + min(max, (start | node_maxindex(node))); else iter->next_index = 1; iter->node = node; set_iter_tags(iter, node, offset, IDR_FREE); return slot; } /** * idr_destroy - release all internal memory from an IDR * @idr: idr handle * * After this function is called, the IDR is empty, and may be reused or * the data structure containing it may be freed. * * A typical clean-up sequence for objects stored in an idr tree will use * idr_for_each() to free all objects, if necessary, then idr_destroy() to * free the memory used to keep track of those objects. */ void idr_destroy(struct idr *idr) { struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head); if (radix_tree_is_internal_node(node)) radix_tree_free_nodes(node); idr->idr_rt.xa_head = NULL; root_tag_set(&idr->idr_rt, IDR_FREE); } EXPORT_SYMBOL(idr_destroy); static void radix_tree_node_ctor(void *arg) { struct radix_tree_node *node = arg; memset(node, 0, sizeof(*node)); INIT_LIST_HEAD(&node->private_list); } static int radix_tree_cpu_dead(unsigned int cpu) { struct radix_tree_preload *rtp; struct radix_tree_node *node; /* Free per-cpu pool of preloaded nodes */ rtp = &per_cpu(radix_tree_preloads, cpu); while (rtp->nr) { node = rtp->nodes; rtp->nodes = node->parent; kmem_cache_free(radix_tree_node_cachep, node); rtp->nr--; } return 0; } void __init radix_tree_init(void) { int ret; BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32); BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK); BUILD_BUG_ON(XA_CHUNK_SIZE > 255); radix_tree_node_cachep = kmem_cache_create("radix_tree_node", sizeof(struct radix_tree_node), 0, SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, radix_tree_node_ctor); ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead", NULL, radix_tree_cpu_dead); WARN_ON(ret < 0); }
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the IP module. * * Version: @(#)ip.h 1.0.2 05/07/93 * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox, <gw4pts@gw4pts.ampr.org> * * Changes: * Mike McLagan : Routing by source */ #ifndef _IP_H #define _IP_H #include <linux/types.h> #include <linux/ip.h> #include <linux/in.h> #include <linux/skbuff.h> #include <linux/jhash.h> #include <linux/sockptr.h> #include <net/inet_sock.h> #include <net/route.h> #include <net/snmp.h> #include <net/flow.h> #include <net/flow_dissector.h> #include <net/netns/hash.h> #include <net/lwtunnel.h> #define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ #define IPV4_MIN_MTU 68 /* RFC 791 */ extern unsigned int sysctl_fib_sync_mem; extern unsigned int sysctl_fib_sync_mem_min; extern unsigned int sysctl_fib_sync_mem_max; struct sock; struct inet_skb_parm { int iif; struct ip_options opt; /* Compiled IP options */ u16 flags; #define IPSKB_FORWARDED BIT(0) #define IPSKB_XFRM_TUNNEL_SIZE BIT(1) #define IPSKB_XFRM_TRANSFORMED BIT(2) #define IPSKB_FRAG_COMPLETE BIT(3) #define IPSKB_REROUTED BIT(4) #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) #define IPSKB_L3SLAVE BIT(7) u16 frag_max_size; }; static inline bool ipv4_l3mdev_skb(u16 flags) { return !!(flags & IPSKB_L3SLAVE); } static inline unsigned int ip_hdrlen(const struct sk_buff *skb) { return ip_hdr(skb)->ihl * 4; } struct ipcm_cookie { struct sockcm_cookie sockc; __be32 addr; int oif; struct ip_options_rcu *opt; __u8 ttl; __s16 tos; char priority; __u16 gso_size; }; static inline void ipcm_init(struct ipcm_cookie *ipcm) { *ipcm = (struct ipcm_cookie) { .tos = -1 }; } static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, const struct inet_sock *inet) { ipcm_init(ipcm); ipcm->sockc.mark = inet->sk.sk_mark; ipcm->sockc.tsflags = inet->sk.sk_tsflags; ipcm->oif = inet->sk.sk_bound_dev_if; ipcm->addr = inet->inet_saddr; } #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) #define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb)) /* return enslaved device index if relevant */ static inline int inet_sdif(struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) return IPCB(skb)->iif; #endif return 0; } /* Special input handler for packets caught by router alert option. They are selected only by protocol field, and then processed likely local ones; but only if someone wants them! Otherwise, router not running rsvpd will kill RSVP. It is user level problem, what it will make with them. I have no idea, how it will masquearde or NAT them (it is joke, joke :-)), but receiver should be enough clever f.e. to forward mtrace requests, sent to multicast group to reach destination designated router. */ struct ip_ra_chain { struct ip_ra_chain __rcu *next; struct sock *sk; union { void (*destructor)(struct sock *); struct sock *saved_sk; }; struct rcu_head rcu; }; /* IP flags. */ #define IP_CE 0x8000 /* Flag: "Congestion" */ #define IP_DF 0x4000 /* Flag: "Don't Fragment" */ #define IP_MF 0x2000 /* Flag: "More Fragments" */ #define IP_OFFSET 0x1FFF /* "Fragment Offset" part */ #define IP_FRAG_TIME (30 * HZ) /* fragment lifetime */ struct msghdr; struct net_device; struct packet_type; struct rtable; struct sockaddr; int igmp_mc_init(void); /* * Functions provided by ip.c */ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, __be32 saddr, __be32 daddr, struct ip_options_rcu *opt, u8 tos); int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); void ip_list_rcv(struct list_head *head, struct packet_type *pt, struct net_device *orig_dev); int ip_local_deliver(struct sk_buff *skb); void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int proto); int ip_mr_input(struct sk_buff *skb); int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); struct ip_fraglist_iter { struct sk_buff *frag; struct iphdr *iph; int offset; unsigned int hlen; }; void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph, unsigned int hlen, struct ip_fraglist_iter *iter); void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter); static inline struct sk_buff *ip_fraglist_next(struct ip_fraglist_iter *iter) { struct sk_buff *skb = iter->frag; iter->frag = skb->next; skb_mark_not_on_list(skb); return skb; } struct ip_frag_state { bool DF; unsigned int hlen; unsigned int ll_rs; unsigned int mtu; unsigned int left; int offset; int ptr; __be16 not_last_frag; }; void ip_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int ll_rs, unsigned int mtu, bool DF, struct ip_frag_state *state); struct sk_buff *ip_frag_next(struct sk_buff *skb, struct ip_frag_state *state); void ip_send_check(struct iphdr *ip); int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, __u8 tos); void ip_init(void); int ip_append_data(struct sock *sk, struct flowi4 *fl4, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int len, int protolen, struct ipcm_cookie *ipc, struct rtable **rt, unsigned int flags); int ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb); ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, int offset, size_t size, int flags); struct sk_buff *__ip_make_skb(struct sock *sk, struct flowi4 *fl4, struct sk_buff_head *queue, struct inet_cork *cork); int ip_send_skb(struct net *net, struct sk_buff *skb); int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4); void ip_flush_pending_frames(struct sock *sk); struct sk_buff *ip_make_skb(struct sock *sk, struct flowi4 *fl4, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, struct ipcm_cookie *ipc, struct rtable **rtp, struct inet_cork *cork, unsigned int flags); int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl); static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4) { return __ip_make_skb(sk, fl4, &sk->sk_write_queue, &inet_sk(sk)->cork.base); } static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet) { return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(inet->tos); } static inline __u8 get_rtconn_flags(struct ipcm_cookie* ipc, struct sock* sk) { return (ipc->tos != -1) ? RT_CONN_FLAGS_TOS(sk, ipc->tos) : RT_CONN_FLAGS(sk); } /* datagram.c */ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); void ip4_datagram_release_cb(struct sock *sk); struct ip_reply_arg { struct kvec iov[1]; int flags; __wsum csum; int csumoffset; /* u16 offset of csum in iov[0].iov_base */ /* -1 if not needed */ int bound_dev_if; u8 tos; kuid_t uid; }; #define IP_REPLY_ARG_NOSRCCHECK 1 static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) { return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0; } void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, const struct ip_options *sopt, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, unsigned int len, u64 transmit_time); #define IP_INC_STATS(net, field) SNMP_INC_STATS64((net)->mib.ip_statistics, field) #define __IP_INC_STATS(net, field) __SNMP_INC_STATS64((net)->mib.ip_statistics, field) #define IP_ADD_STATS(net, field, val) SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val) #define __IP_ADD_STATS(net, field, val) __SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val) #define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val) #define __IP_UPD_PO_STATS(net, field, val) __SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val) #define NET_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.net_statistics, field) #define __NET_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.net_statistics, field) #define NET_ADD_STATS(net, field, adnd) SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd) #define __NET_ADD_STATS(net, field, adnd) __SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd) u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct); unsigned long snmp_fold_field(void __percpu *mib, int offt); #if BITS_PER_LONG==32 u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offct, size_t syncp_offset); u64 snmp_fold_field64(void __percpu *mib, int offt, size_t sync_off); #else static inline u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offct, size_t syncp_offset) { return snmp_get_cpu_field(mib, cpu, offct); } static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_off) { return snmp_fold_field(mib, offt); } #endif #define snmp_get_cpu_field64_batch(buff64, stats_list, mib_statistic, offset) \ { \ int i, c; \ for_each_possible_cpu(c) { \ for (i = 0; stats_list[i].name; i++) \ buff64[i] += snmp_get_cpu_field64( \ mib_statistic, \ c, stats_list[i].entry, \ offset); \ } \ } #define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \ { \ int i, c; \ for_each_possible_cpu(c) { \ for (i = 0; stats_list[i].name; i++) \ buff[i] += snmp_get_cpu_field( \ mib_statistic, \ c, stats_list[i].entry); \ } \ } void inet_get_local_port_range(struct net *net, int *low, int *high); #ifdef CONFIG_SYSCTL static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port) { if (!net->ipv4.sysctl_local_reserved_ports) return false; return test_bit(port, net->ipv4.sysctl_local_reserved_ports); } static inline bool sysctl_dev_name_is_allowed(const char *name) { return strcmp(name, "default") != 0 && strcmp(name, "all") != 0; } static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port) { return port < net->ipv4.sysctl_ip_prot_sock; } #else static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port) { return false; } static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port) { return port < PROT_SOCK; } #endif __be32 inet_current_timestamp(void); /* From inetpeer.c */ extern int inet_peer_threshold; extern int inet_peer_minttl; extern int inet_peer_maxttl; void ipfrag_init(void); void ip_static_sysctl_init(void); #define IP4_REPLY_MARK(net, mark) \ ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) static inline bool ip_is_fragment(const struct iphdr *iph) { return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; } #ifdef CONFIG_INET #include <net/dst.h> /* The function in 2.2 was invalid, producing wrong result for * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */ static inline int ip_decrease_ttl(struct iphdr *iph) { u32 check = (__force u32)iph->check; check += (__force u32)htons(0x0100); iph->check = (__force __sum16)(check + (check>=0xFFFF)); return --iph->ttl; } static inline int ip_mtu_locked(const struct dst_entry *dst) { const struct rtable *rt = (const struct rtable *)dst; return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU); } static inline int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst) { u8 pmtudisc = READ_ONCE(inet_sk(sk)->pmtudisc); return pmtudisc == IP_PMTUDISC_DO || (pmtudisc == IP_PMTUDISC_WANT && !ip_mtu_locked(dst)); } static inline bool ip_sk_accept_pmtu(const struct sock *sk) { return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE && inet_sk(sk)->pmtudisc != IP_PMTUDISC_OMIT; } static inline bool ip_sk_use_pmtu(const struct sock *sk) { return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE; } static inline bool ip_sk_ignore_df(const struct sock *sk) { return inet_sk(sk)->pmtudisc < IP_PMTUDISC_DO || inet_sk(sk)->pmtudisc == IP_PMTUDISC_OMIT; } static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { struct net *net = dev_net(dst->dev); unsigned int mtu; if (net->ipv4.sysctl_ip_fwd_use_pmtu || ip_mtu_locked(dst) || !forwarding) return dst_mtu(dst); /* 'forwarding = true' case should always honour route mtu */ mtu = dst_metric_raw(dst, RTAX_MTU); if (!mtu) mtu = min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU); return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } static inline unsigned int ip_skb_dst_mtu(struct sock *sk, const struct sk_buff *skb) { unsigned int mtu; if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) { bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding); } mtu = min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); return mtu - lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); } struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, int fc_mx_len, struct netlink_ext_ack *extack); static inline void ip_fib_metrics_put(struct dst_metrics *fib_metrics) { if (fib_metrics != &dst_default_metrics && refcount_dec_and_test(&fib_metrics->refcnt)) kfree(fib_metrics); } /* ipv4 and ipv6 both use refcounted metrics if it is not the default */ static inline void ip_dst_init_metrics(struct dst_entry *dst, struct dst_metrics *fib_metrics) { dst_init_metrics(dst, fib_metrics->metrics, true); if (fib_metrics != &dst_default_metrics) { dst->_metrics |= DST_METRICS_REFCOUNTED; refcount_inc(&fib_metrics->refcnt); } } static inline void ip_dst_metrics_put(struct dst_entry *dst) { struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) kfree(p); } u32 ip_idents_reserve(u32 hash, int segs); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs); static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, struct sock *sk, int segs) { struct iphdr *iph = ip_hdr(skb); if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { /* This is only to work around buggy Windows95/2000 * VJ compression implementations. If the ID field * does not change, they drop every other packet in * a TCP stream using header compression. */ if (sk && inet_sk(sk)->inet_daddr) { iph->id = htons(inet_sk(sk)->inet_id); inet_sk(sk)->inet_id += segs; } else { iph->id = 0; } } else { __ip_select_ident(net, iph, segs); } } static inline void ip_select_ident(struct net *net, struct sk_buff *skb, struct sock *sk) { ip_select_ident_segs(net, skb, sk, 1); } static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto) { return csum_tcpudp_nofold(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, skb->len, proto, 0); } /* copy IPv4 saddr & daddr to flow_keys, possibly using 64bit load/store * Equivalent to : flow->v4addrs.src = iph->saddr; * flow->v4addrs.dst = iph->daddr; */ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow, const struct iphdr *iph) { BUILD_BUG_ON(offsetof(typeof(flow->addrs), v4addrs.dst) != offsetof(typeof(flow->addrs), v4addrs.src) + sizeof(flow->addrs.v4addrs.src)); memcpy(&flow->addrs.v4addrs, &iph->saddr, sizeof(flow->addrs.v4addrs)); flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto) { const struct iphdr *iph = skb_gro_network_header(skb); return csum_tcpudp_nofold(iph->saddr, iph->daddr, skb_gro_len(skb), proto, 0); } /* * Map a multicast IP onto multicast MAC for type ethernet. */ static inline void ip_eth_mc_map(__be32 naddr, char *buf) { __u32 addr=ntohl(naddr); buf[0]=0x01; buf[1]=0x00; buf[2]=0x5e; buf[5]=addr&0xFF; addr>>=8; buf[4]=addr&0xFF; addr>>=8; buf[3]=addr&0x7F; } /* * Map a multicast IP onto multicast MAC for type IP-over-InfiniBand. * Leave P_Key as 0 to be filled in by driver. */ static inline void ip_ib_mc_map(__be32 naddr, const unsigned char *broadcast, char *buf) { __u32 addr; unsigned char scope = broadcast[5] & 0xF; buf[0] = 0; /* Reserved */ buf[1] = 0xff; /* Multicast QPN */ buf[2] = 0xff; buf[3] = 0xff; addr = ntohl(naddr); buf[4] = 0xff; buf[5] = 0x10 | scope; /* scope from broadcast address */ buf[6] = 0x40; /* IPv4 signature */ buf[7] = 0x1b; buf[8] = broadcast[8]; /* P_Key */ buf[9] = broadcast[9]; buf[10] = 0; buf[11] = 0; buf[12] = 0; buf[13] = 0; buf[14] = 0; buf[15] = 0; buf[19] = addr & 0xff; addr >>= 8; buf[18] = addr & 0xff; addr >>= 8; buf[17] = addr & 0xff; addr >>= 8; buf[16] = addr & 0x0f; } static inline void ip_ipgre_mc_map(__be32 naddr, const unsigned char *broadcast, char *buf) { if ((broadcast[0] | broadcast[1] | broadcast[2] | broadcast[3]) != 0) memcpy(buf, broadcast, 4); else memcpy(buf, &naddr, sizeof(naddr)); } #if IS_ENABLED(CONFIG_IPV6) #include <linux/ipv6.h> #endif static __inline__ void inet_reset_saddr(struct sock *sk) { inet_sk(sk)->inet_rcv_saddr = inet_sk(sk)->inet_saddr = 0; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); memset(&np->saddr, 0, sizeof(np->saddr)); memset(&sk->sk_v6_rcv_saddr, 0, sizeof(sk->sk_v6_rcv_saddr)); } #endif } #endif static inline unsigned int ipv4_addr_hash(__be32 ip) { return (__force unsigned int) ip; } static inline u32 ipv4_portaddr_hash(const struct net *net, __be32 saddr, unsigned int port) { return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; } bool ip_call_ra_chain(struct sk_buff *skb); /* * Functions provided by ip_fragment.c */ enum ip_defrag_users { IP_DEFRAG_LOCAL_DELIVER, IP_DEFRAG_CALL_RA_CHAIN, IP_DEFRAG_CONNTRACK_IN, __IP_DEFRAG_CONNTRACK_IN_END = IP_DEFRAG_CONNTRACK_IN + USHRT_MAX, IP_DEFRAG_CONNTRACK_OUT, __IP_DEFRAG_CONNTRACK_OUT_END = IP_DEFRAG_CONNTRACK_OUT + USHRT_MAX, IP_DEFRAG_CONNTRACK_BRIDGE_IN, __IP_DEFRAG_CONNTRACK_BRIDGE_IN = IP_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX, IP_DEFRAG_VS_IN, IP_DEFRAG_VS_OUT, IP_DEFRAG_VS_FWD, IP_DEFRAG_AF_PACKET, IP_DEFRAG_MACVLAN, }; /* Return true if the value of 'user' is between 'lower_bond' * and 'upper_bond' inclusively. */ static inline bool ip_defrag_user_in_between(u32 user, enum ip_defrag_users lower_bond, enum ip_defrag_users upper_bond) { return user >= lower_bond && user <= upper_bond; } int ip_defrag(struct net *net, struct sk_buff *skb, u32 user); #ifdef CONFIG_INET struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user); #else static inline struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) { return skb; } #endif /* * Functions provided by ip_forward.c */ int ip_forward(struct sk_buff *skb); /* * Functions provided by ip_options.c */ void ip_options_build(struct sk_buff *skb, struct ip_options *opt, __be32 daddr, struct rtable *rt, int is_frag); int __ip_options_echo(struct net *net, struct ip_options *dopt, struct sk_buff *skb, const struct ip_options *sopt); static inline int ip_options_echo(struct net *net, struct ip_options *dopt, struct sk_buff *skb) { return __ip_options_echo(net, dopt, skb, &IPCB(skb)->opt); } void ip_options_fragment(struct sk_buff *skb); int __ip_options_compile(struct net *net, struct ip_options *opt, struct sk_buff *skb, __be32 *info); int ip_options_compile(struct net *net, struct ip_options *opt, struct sk_buff *skb); int ip_options_get(struct net *net, struct ip_options_rcu **optp, sockptr_t data, int optlen); void ip_options_undo(struct ip_options *opt); void ip_forward_options(struct sk_buff *skb); int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev); /* * Functions provided by ip_sockglue.c */ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)); int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, u32 info); static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) { ip_cmsg_recv_offset(msg, skb->sk, skb, 0, 0); } bool icmp_global_allow(void); extern int sysctl_icmp_msgs_per_sec; extern int sysctl_icmp_msgs_burst; #ifdef CONFIG_PROC_FS int ip_misc_proc_init(void); #endif int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family, struct netlink_ext_ack *extack); static inline bool inetdev_valid_mtu(unsigned int mtu) { return likely(mtu >= IPV4_MIN_MTU); } void ip_sock_set_freebind(struct sock *sk); int ip_sock_set_mtu_discover(struct sock *sk, int val); void ip_sock_set_pktinfo(struct sock *sk); void ip_sock_set_recverr(struct sock *sk); void ip_sock_set_tos(struct sock *sk, int val); #endif /* _IP_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 #ifndef _LINUX_PSI_H #define _LINUX_PSI_H #include <linux/jump_label.h> #include <linux/psi_types.h> #include <linux/sched.h> #include <linux/poll.h> struct seq_file; struct css_set; #ifdef CONFIG_PSI extern struct static_key_false psi_disabled; extern struct psi_group psi_system; void psi_init(void); void psi_task_change(struct task_struct *task, int clear, int set); void psi_task_switch(struct task_struct *prev, struct task_struct *next, bool sleep); void psi_memstall_tick(struct task_struct *task, int cpu); void psi_memstall_enter(unsigned long *flags); void psi_memstall_leave(unsigned long *flags); int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res); #ifdef CONFIG_CGROUPS int psi_cgroup_alloc(struct cgroup *cgrp); void psi_cgroup_free(struct cgroup *cgrp); void cgroup_move_task(struct task_struct *p, struct css_set *to); struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, size_t nbytes, enum psi_res res); void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t); __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait); #endif #else /* CONFIG_PSI */ static inline void psi_init(void) {} static inline void psi_memstall_enter(unsigned long *flags) {} static inline void psi_memstall_leave(unsigned long *flags) {} #ifdef CONFIG_CGROUPS static inline int psi_cgroup_alloc(struct cgroup *cgrp) { return 0; } static inline void psi_cgroup_free(struct cgroup *cgrp) { } static inline void cgroup_move_task(struct task_struct *p, struct css_set *to) { rcu_assign_pointer(p->cgroups, to); } #endif #endif /* CONFIG_PSI */ #endif /* _LINUX_PSI_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 /* SPDX-License-Identifier: GPL-2.0-only */ /* * IEEE802.15.4-2003 specification * * Copyright (C) 2007, 2008 Siemens AG * * Written by: * Pavel Smolenskiy <pavel.smolenskiy@gmail.com> * Maxim Gorbachyov <maxim.gorbachev@siemens.com> * Maxim Osipov <maxim.osipov@siemens.com> * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> * Alexander Smirnov <alex.bluesman.smirnov@gmail.com> */ #ifndef LINUX_IEEE802154_H #define LINUX_IEEE802154_H #include <linux/types.h> #include <linux/random.h> #define IEEE802154_MTU 127 #define IEEE802154_ACK_PSDU_LEN 5 #define IEEE802154_MIN_PSDU_LEN 9 #define IEEE802154_FCS_LEN 2 #define IEEE802154_MAX_AUTH_TAG_LEN 16 #define IEEE802154_FC_LEN 2 #define IEEE802154_SEQ_LEN 1 /* General MAC frame format: * 2 bytes: Frame Control * 1 byte: Sequence Number * 20 bytes: Addressing fields * 14 bytes: Auxiliary Security Header */ #define IEEE802154_MAX_HEADER_LEN (2 + 1 + 20 + 14) #define IEEE802154_MIN_HEADER_LEN (IEEE802154_ACK_PSDU_LEN - \ IEEE802154_FCS_LEN) #define IEEE802154_PAN_ID_BROADCAST 0xffff #define IEEE802154_ADDR_SHORT_BROADCAST 0xffff #define IEEE802154_ADDR_SHORT_UNSPEC 0xfffe #define IEEE802154_EXTENDED_ADDR_LEN 8 #define IEEE802154_SHORT_ADDR_LEN 2 #define IEEE802154_PAN_ID_LEN 2 #define IEEE802154_LIFS_PERIOD 40 #define IEEE802154_SIFS_PERIOD 12 #define IEEE802154_MAX_SIFS_FRAME_SIZE 18 #define IEEE802154_MAX_CHANNEL 26 #define IEEE802154_MAX_PAGE 31 #define IEEE802154_FC_TYPE_BEACON 0x0 /* Frame is beacon */ #define IEEE802154_FC_TYPE_DATA 0x1 /* Frame is data */ #define IEEE802154_FC_TYPE_ACK 0x2 /* Frame is acknowledgment */ #define IEEE802154_FC_TYPE_MAC_CMD 0x3 /* Frame is MAC command */ #define IEEE802154_FC_TYPE_SHIFT 0 #define IEEE802154_FC_TYPE_MASK ((1 << 3) - 1) #define IEEE802154_FC_TYPE(x) ((x & IEEE802154_FC_TYPE_MASK) >> IEEE802154_FC_TYPE_SHIFT) #define IEEE802154_FC_SET_TYPE(v, x) do { \ v = (((v) & ~IEEE802154_FC_TYPE_MASK) | \ (((x) << IEEE802154_FC_TYPE_SHIFT) & IEEE802154_FC_TYPE_MASK)); \ } while (0) #define IEEE802154_FC_SECEN_SHIFT 3 #define IEEE802154_FC_SECEN (1 << IEEE802154_FC_SECEN_SHIFT) #define IEEE802154_FC_FRPEND_SHIFT 4 #define IEEE802154_FC_FRPEND (1 << IEEE802154_FC_FRPEND_SHIFT) #define IEEE802154_FC_ACK_REQ_SHIFT 5 #define IEEE802154_FC_ACK_REQ (1 << IEEE802154_FC_ACK_REQ_SHIFT) #define IEEE802154_FC_INTRA_PAN_SHIFT 6 #define IEEE802154_FC_INTRA_PAN (1 << IEEE802154_FC_INTRA_PAN_SHIFT) #define IEEE802154_FC_SAMODE_SHIFT 14 #define IEEE802154_FC_SAMODE_MASK (3 << IEEE802154_FC_SAMODE_SHIFT) #define IEEE802154_FC_DAMODE_SHIFT 10 #define IEEE802154_FC_DAMODE_MASK (3 << IEEE802154_FC_DAMODE_SHIFT) #define IEEE802154_FC_VERSION_SHIFT 12 #define IEEE802154_FC_VERSION_MASK (3 << IEEE802154_FC_VERSION_SHIFT) #define IEEE802154_FC_VERSION(x) ((x & IEEE802154_FC_VERSION_MASK) >> IEEE802154_FC_VERSION_SHIFT) #define IEEE802154_FC_SAMODE(x) \ (((x) & IEEE802154_FC_SAMODE_MASK) >> IEEE802154_FC_SAMODE_SHIFT) #define IEEE802154_FC_DAMODE(x) \ (((x) & IEEE802154_FC_DAMODE_MASK) >> IEEE802154_FC_DAMODE_SHIFT) #define IEEE802154_SCF_SECLEVEL_MASK 7 #define IEEE802154_SCF_SECLEVEL_SHIFT 0 #define IEEE802154_SCF_SECLEVEL(x) (x & IEEE802154_SCF_SECLEVEL_MASK) #define IEEE802154_SCF_KEY_ID_MODE_SHIFT 3 #define IEEE802154_SCF_KEY_ID_MODE_MASK (3 << IEEE802154_SCF_KEY_ID_MODE_SHIFT) #define IEEE802154_SCF_KEY_ID_MODE(x) \ ((x & IEEE802154_SCF_KEY_ID_MODE_MASK) >> IEEE802154_SCF_KEY_ID_MODE_SHIFT) #define IEEE802154_SCF_KEY_IMPLICIT 0 #define IEEE802154_SCF_KEY_INDEX 1 #define IEEE802154_SCF_KEY_SHORT_INDEX 2 #define IEEE802154_SCF_KEY_HW_INDEX 3 #define IEEE802154_SCF_SECLEVEL_NONE 0 #define IEEE802154_SCF_SECLEVEL_MIC32 1 #define IEEE802154_SCF_SECLEVEL_MIC64 2 #define IEEE802154_SCF_SECLEVEL_MIC128 3 #define IEEE802154_SCF_SECLEVEL_ENC 4 #define IEEE802154_SCF_SECLEVEL_ENC_MIC32 5 #define IEEE802154_SCF_SECLEVEL_ENC_MIC64 6 #define IEEE802154_SCF_SECLEVEL_ENC_MIC128 7 /* MAC footer size */ #define IEEE802154_MFR_SIZE 2 /* 2 octets */ /* MAC's Command Frames Identifiers */ #define IEEE802154_CMD_ASSOCIATION_REQ 0x01 #define IEEE802154_CMD_ASSOCIATION_RESP 0x02 #define IEEE802154_CMD_DISASSOCIATION_NOTIFY 0x03 #define IEEE802154_CMD_DATA_REQ 0x04 #define IEEE802154_CMD_PANID_CONFLICT_NOTIFY 0x05 #define IEEE802154_CMD_ORPHAN_NOTIFY 0x06 #define IEEE802154_CMD_BEACON_REQ 0x07 #define IEEE802154_CMD_COORD_REALIGN_NOTIFY 0x08 #define IEEE802154_CMD_GTS_REQ 0x09 /* * The return values of MAC operations */ enum { /* * The requested operation was completed successfully. * For a transmission request, this value indicates * a successful transmission. */ IEEE802154_SUCCESS = 0x0, /* The beacon was lost following a synchronization request. */ IEEE802154_BEACON_LOSS = 0xe0, /* * A transmission could not take place due to activity on the * channel, i.e., the CSMA-CA mechanism has failed. */ IEEE802154_CHNL_ACCESS_FAIL = 0xe1, /* The GTS request has been denied by the PAN coordinator. */ IEEE802154_DENINED = 0xe2, /* The attempt to disable the transceiver has failed. */ IEEE802154_DISABLE_TRX_FAIL = 0xe3, /* * The received frame induces a failed security check according to * the security suite. */ IEEE802154_FAILED_SECURITY_CHECK = 0xe4, /* * The frame resulting from secure processing has a length that is * greater than aMACMaxFrameSize. */ IEEE802154_FRAME_TOO_LONG = 0xe5, /* * The requested GTS transmission failed because the specified GTS * either did not have a transmit GTS direction or was not defined. */ IEEE802154_INVALID_GTS = 0xe6, /* * A request to purge an MSDU from the transaction queue was made using * an MSDU handle that was not found in the transaction table. */ IEEE802154_INVALID_HANDLE = 0xe7, /* A parameter in the primitive is out of the valid range.*/ IEEE802154_INVALID_PARAMETER = 0xe8, /* No acknowledgment was received after aMaxFrameRetries. */ IEEE802154_NO_ACK = 0xe9, /* A scan operation failed to find any network beacons.*/ IEEE802154_NO_BEACON = 0xea, /* No response data were available following a request. */ IEEE802154_NO_DATA = 0xeb, /* The operation failed because a short address was not allocated. */ IEEE802154_NO_SHORT_ADDRESS = 0xec, /* * A receiver enable request was unsuccessful because it could not be * completed within the CAP. */ IEEE802154_OUT_OF_CAP = 0xed, /* * A PAN identifier conflict has been detected and communicated to the * PAN coordinator. */ IEEE802154_PANID_CONFLICT = 0xee, /* A coordinator realignment command has been received. */ IEEE802154_REALIGMENT = 0xef, /* The transaction has expired and its information discarded. */ IEEE802154_TRANSACTION_EXPIRED = 0xf0, /* There is no capacity to store the transaction. */ IEEE802154_TRANSACTION_OVERFLOW = 0xf1, /* * The transceiver was in the transmitter enabled state when the * receiver was requested to be enabled. */ IEEE802154_TX_ACTIVE = 0xf2, /* The appropriate key is not available in the ACL. */ IEEE802154_UNAVAILABLE_KEY = 0xf3, /* * A SET/GET request was issued with the identifier of a PIB attribute * that is not supported. */ IEEE802154_UNSUPPORTED_ATTR = 0xf4, /* * A request to perform a scan operation failed because the MLME was * in the process of performing a previously initiated scan operation. */ IEEE802154_SCAN_IN_PROGRESS = 0xfc, }; /* frame control handling */ #define IEEE802154_FCTL_FTYPE 0x0003 #define IEEE802154_FCTL_ACKREQ 0x0020 #define IEEE802154_FCTL_SECEN 0x0004 #define IEEE802154_FCTL_INTRA_PAN 0x0040 #define IEEE802154_FCTL_DADDR 0x0c00 #define IEEE802154_FCTL_SADDR 0xc000 #define IEEE802154_FTYPE_DATA 0x0001 #define IEEE802154_FCTL_ADDR_NONE 0x0000 #define IEEE802154_FCTL_DADDR_SHORT 0x0800 #define IEEE802154_FCTL_DADDR_EXTENDED 0x0c00 #define IEEE802154_FCTL_SADDR_SHORT 0x8000 #define IEEE802154_FCTL_SADDR_EXTENDED 0xc000 /* * ieee802154_is_data - check if type is IEEE802154_FTYPE_DATA * @fc: frame control bytes in little-endian byteorder */ static inline int ieee802154_is_data(__le16 fc) { return (fc & cpu_to_le16(IEEE802154_FCTL_FTYPE)) == cpu_to_le16(IEEE802154_FTYPE_DATA); } /** * ieee802154_is_secen - check if Security bit is set * @fc: frame control bytes in little-endian byteorder */ static inline bool ieee802154_is_secen(__le16 fc) { return fc & cpu_to_le16(IEEE802154_FCTL_SECEN); } /** * ieee802154_is_ackreq - check if acknowledgment request bit is set * @fc: frame control bytes in little-endian byteorder */ static inline bool ieee802154_is_ackreq(__le16 fc) { return fc & cpu_to_le16(IEEE802154_FCTL_ACKREQ); } /** * ieee802154_is_intra_pan - check if intra pan id communication * @fc: frame control bytes in little-endian byteorder */ static inline bool ieee802154_is_intra_pan(__le16 fc) { return fc & cpu_to_le16(IEEE802154_FCTL_INTRA_PAN); } /* * ieee802154_daddr_mode - get daddr mode from fc * @fc: frame control bytes in little-endian byteorder */ static inline __le16 ieee802154_daddr_mode(__le16 fc) { return fc & cpu_to_le16(IEEE802154_FCTL_DADDR); } /* * ieee802154_saddr_mode - get saddr mode from fc * @fc: frame control bytes in little-endian byteorder */ static inline __le16 ieee802154_saddr_mode(__le16 fc) { return fc & cpu_to_le16(IEEE802154_FCTL_SADDR); } /** * ieee802154_is_valid_psdu_len - check if psdu len is valid * available lengths: * 0-4 Reserved * 5 MPDU (Acknowledgment) * 6-8 Reserved * 9-127 MPDU * * @len: psdu len with (MHR + payload + MFR) */ static inline bool ieee802154_is_valid_psdu_len(u8 len) { return (len == IEEE802154_ACK_PSDU_LEN || (len >= IEEE802154_MIN_PSDU_LEN && len <= IEEE802154_MTU)); } /** * ieee802154_is_valid_extended_unicast_addr - check if extended addr is valid * @addr: extended addr to check */ static inline bool ieee802154_is_valid_extended_unicast_addr(__le64 addr) { /* Bail out if the address is all zero, or if the group * address bit is set. */ return ((addr != cpu_to_le64(0x0000000000000000ULL)) && !(addr & cpu_to_le64(0x0100000000000000ULL))); } /** * ieee802154_is_broadcast_short_addr - check if short addr is broadcast * @addr: short addr to check */ static inline bool ieee802154_is_broadcast_short_addr(__le16 addr) { return (addr == cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST)); } /** * ieee802154_is_unspec_short_addr - check if short addr is unspecified * @addr: short addr to check */ static inline bool ieee802154_is_unspec_short_addr(__le16 addr) { return (addr == cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC)); } /** * ieee802154_is_valid_src_short_addr - check if source short address is valid * @addr: short addr to check */ static inline bool ieee802154_is_valid_src_short_addr(__le16 addr) { return !(ieee802154_is_broadcast_short_addr(addr) || ieee802154_is_unspec_short_addr(addr)); } /** * ieee802154_random_extended_addr - generates a random extended address * @addr: extended addr pointer to place the random address */ static inline void ieee802154_random_extended_addr(__le64 *addr) { get_random_bytes(addr, IEEE802154_EXTENDED_ADDR_LEN); /* clear the group bit, and set the locally administered bit */ ((u8 *)addr)[IEEE802154_EXTENDED_ADDR_LEN - 1] &= ~0x01; ((u8 *)addr)[IEEE802154_EXTENDED_ADDR_LEN - 1] |= 0x02; } #endif /* LINUX_IEEE802154_H */
1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 // SPDX-License-Identifier: GPL-2.0-or-later /* * Generic address resolution entity * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * * Fixes: * Vitaly E. Lavrov releasing NULL neighbor in neigh_add. * Harald Welte Add neighbour cache statistics like rtstat */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/slab.h> #include <linux/kmemleak.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/socket.h> #include <linux/netdevice.h> #include <linux/proc_fs.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif #include <linux/times.h> #include <net/net_namespace.h> #include <net/neighbour.h> #include <net/arp.h> #include <net/dst.h> #include <net/sock.h> #include <net/netevent.h> #include <net/netlink.h> #include <linux/rtnetlink.h> #include <linux/random.h> #include <linux/string.h> #include <linux/log2.h> #include <linux/inetdevice.h> #include <net/addrconf.h> #include <trace/events/neigh.h> #define DEBUG #define NEIGH_DEBUG 1 #define neigh_dbg(level, fmt, ...) \ do { \ if (level <= NEIGH_DEBUG) \ pr_debug(fmt, ##__VA_ARGS__); \ } while (0) #define PNEIGH_HASHMASK 0xF static void neigh_timer_handler(struct timer_list *t); static void __neigh_notify(struct neighbour *n, int type, int flags, u32 pid); static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid); static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, struct net_device *dev); #ifdef CONFIG_PROC_FS static const struct seq_operations neigh_stat_seq_ops; #endif /* Neighbour hash table buckets are protected with rwlock tbl->lock. - All the scans/updates to hash buckets MUST be made under this lock. - NOTHING clever should be made under this lock: no callbacks to protocol backends, no attempts to send something to network. It will result in deadlocks, if backend/driver wants to use neighbour cache. - If the entry requires some non-trivial actions, increase its reference count and release table lock. Neighbour entries are protected: - with reference count. - with rwlock neigh->lock Reference count prevents destruction. neigh->lock mainly serializes ll address data and its validity state. However, the same lock is used to protect another entry fields: - timer - resolution queue Again, nothing clever shall be made under neigh->lock, the most complicated procedure, which we allow is dev->hard_header. It is supposed, that dev->hard_header is simplistic and does not make callbacks to neighbour tables. */ static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb) { kfree_skb(skb); return -ENETDOWN; } static void neigh_cleanup_and_release(struct neighbour *neigh) { trace_neigh_cleanup_and_release(neigh, 0); __neigh_notify(neigh, RTM_DELNEIGH, 0, 0); call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); neigh_release(neigh); } /* * It is random distribution in the interval (1/2)*base...(3/2)*base. * It corresponds to default IPv6 settings and is not overridable, * because it is really reasonable choice. */ unsigned long neigh_rand_reach_time(unsigned long base) { return base ? (prandom_u32() % base) + (base >> 1) : 0; } EXPORT_SYMBOL(neigh_rand_reach_time); static void neigh_mark_dead(struct neighbour *n) { n->dead = 1; if (!list_empty(&n->gc_list)) { list_del_init(&n->gc_list); atomic_dec(&n->tbl->gc_entries); } } static void neigh_update_gc_list(struct neighbour *n) { bool on_gc_list, exempt_from_gc; write_lock_bh(&n->tbl->lock); write_lock(&n->lock); if (n->dead) goto out; /* remove from the gc list if new state is permanent or if neighbor * is externally learned; otherwise entry should be on the gc list */ exempt_from_gc = n->nud_state & NUD_PERMANENT || n->flags & NTF_EXT_LEARNED; on_gc_list = !list_empty(&n->gc_list); if (exempt_from_gc && on_gc_list) { list_del_init(&n->gc_list); atomic_dec(&n->tbl->gc_entries); } else if (!exempt_from_gc && !on_gc_list) { /* add entries to the tail; cleaning removes from the front */ list_add_tail(&n->gc_list, &n->tbl->gc_list); atomic_inc(&n->tbl->gc_entries); } out: write_unlock(&n->lock); write_unlock_bh(&n->tbl->lock); } static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags, int *notify) { bool rc = false; u8 ndm_flags; if (!(flags & NEIGH_UPDATE_F_ADMIN)) return rc; ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { if (ndm_flags & NTF_EXT_LEARNED) neigh->flags |= NTF_EXT_LEARNED; else neigh->flags &= ~NTF_EXT_LEARNED; rc = true; *notify = 1; } return rc; } static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, struct neigh_table *tbl) { bool retval = false; write_lock(&n->lock); if (refcount_read(&n->refcnt) == 1) { struct neighbour *neigh; neigh = rcu_dereference_protected(n->next, lockdep_is_held(&tbl->lock)); rcu_assign_pointer(*np, neigh); neigh_mark_dead(n); retval = true; } write_unlock(&n->lock); if (retval) neigh_cleanup_and_release(n); return retval; } bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) { struct neigh_hash_table *nht; void *pkey = ndel->primary_key; u32 hash_val; struct neighbour *n; struct neighbour __rcu **np; nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd); hash_val = hash_val >> (32 - nht->hash_shift); np = &nht->hash_buckets[hash_val]; while ((n = rcu_dereference_protected(*np, lockdep_is_held(&tbl->lock)))) { if (n == ndel) return neigh_del(n, np, tbl); np = &n->next; } return false; } static int neigh_forced_gc(struct neigh_table *tbl) { int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2; unsigned long tref = jiffies - 5 * HZ; struct neighbour *n, *tmp; int shrunk = 0; NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); write_lock_bh(&tbl->lock); list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) { if (refcount_read(&n->refcnt) == 1) { bool remove = false; write_lock(&n->lock); if ((n->nud_state == NUD_FAILED) || (n->nud_state == NUD_NOARP) || (tbl->is_multicast && tbl->is_multicast(n->primary_key)) || time_after(tref, n->updated)) remove = true; write_unlock(&n->lock); if (remove && neigh_remove_one(n, tbl)) shrunk++; if (shrunk >= max_clean) break; } } tbl->last_flush = jiffies; write_unlock_bh(&tbl->lock); return shrunk; } static void neigh_add_timer(struct neighbour *n, unsigned long when) { neigh_hold(n); if (unlikely(mod_timer(&n->timer, when))) { printk("NEIGH: BUG, double timer add, state is %x\n", n->nud_state); dump_stack(); } } static int neigh_del_timer(struct neighbour *n) { if ((n->nud_state & NUD_IN_TIMER) && del_timer(&n->timer)) { neigh_release(n); return 1; } return 0; } static void pneigh_queue_purge(struct sk_buff_head *list) { struct sk_buff *skb; while ((skb = skb_dequeue(list)) != NULL) { dev_put(skb->dev); kfree_skb(skb); } } static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, bool skip_perm) { int i; struct neigh_hash_table *nht; nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); for (i = 0; i < (1 << nht->hash_shift); i++) { struct neighbour *n; struct neighbour __rcu **np = &nht->hash_buckets[i]; while ((n = rcu_dereference_protected(*np, lockdep_is_held(&tbl->lock))) != NULL) { if (dev && n->dev != dev) { np = &n->next; continue; } if (skip_perm && n->nud_state & NUD_PERMANENT) { np = &n->next; continue; } rcu_assign_pointer(*np, rcu_dereference_protected(n->next, lockdep_is_held(&tbl->lock))); write_lock(&n->lock); neigh_del_timer(n); neigh_mark_dead(n); if (refcount_read(&n->refcnt) != 1) { /* The most unpleasant situation. We must destroy neighbour entry, but someone still uses it. The destroy will be delayed until the last user releases us, but we must kill timers etc. and move it to safe state. */ __skb_queue_purge(&n->arp_queue); n->arp_queue_len_bytes = 0; n->output = neigh_blackhole; if (n->nud_state & NUD_VALID) n->nud_state = NUD_NOARP; else n->nud_state = NUD_NONE; neigh_dbg(2, "neigh %p is stray\n", n); } write_unlock(&n->lock); neigh_cleanup_and_release(n); } } } void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) { write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev, false); write_unlock_bh(&tbl->lock); } EXPORT_SYMBOL(neigh_changeaddr); static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, bool skip_perm) { write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev, skip_perm); pneigh_ifdown_and_unlock(tbl, dev); del_timer_sync(&tbl->proxy_timer); pneigh_queue_purge(&tbl->proxy_queue); return 0; } int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev) { __neigh_ifdown(tbl, dev, true); return 0; } EXPORT_SYMBOL(neigh_carrier_down); int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) { __neigh_ifdown(tbl, dev, false); return 0; } EXPORT_SYMBOL(neigh_ifdown); static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev, bool exempt_from_gc) { struct neighbour *n = NULL; unsigned long now = jiffies; int entries; if (exempt_from_gc) goto do_alloc; entries = atomic_inc_return(&tbl->gc_entries) - 1; if (entries >= tbl->gc_thresh3 || (entries >= tbl->gc_thresh2 && time_after(now, tbl->last_flush + 5 * HZ))) { if (!neigh_forced_gc(tbl) && entries >= tbl->gc_thresh3) { net_info_ratelimited("%s: neighbor table overflow!\n", tbl->id); NEIGH_CACHE_STAT_INC(tbl, table_fulls); goto out_entries; } } do_alloc: n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC); if (!n) goto out_entries; __skb_queue_head_init(&n->arp_queue); rwlock_init(&n->lock); seqlock_init(&n->ha_lock); n->updated = n->used = now; n->nud_state = NUD_NONE; n->output = neigh_blackhole; seqlock_init(&n->hh.hh_lock); n->parms = neigh_parms_clone(&tbl->parms); timer_setup(&n->timer, neigh_timer_handler, 0); NEIGH_CACHE_STAT_INC(tbl, allocs); n->tbl = tbl; refcount_set(&n->refcnt, 1); n->dead = 1; INIT_LIST_HEAD(&n->gc_list); atomic_inc(&tbl->entries); out: return n; out_entries: if (!exempt_from_gc) atomic_dec(&tbl->gc_entries); goto out; } static void neigh_get_hash_rnd(u32 *x) { *x = get_random_u32() | 1; } static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) { size_t size = (1 << shift) * sizeof(struct neighbour *); struct neigh_hash_table *ret; struct neighbour __rcu **buckets; int i; ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (!ret) return NULL; if (size <= PAGE_SIZE) { buckets = kzalloc(size, GFP_ATOMIC); } else { buckets = (struct neighbour __rcu **) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, get_order(size)); kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); } if (!buckets) { kfree(ret); return NULL; } ret->hash_buckets = buckets; ret->hash_shift = shift; for (i = 0; i < NEIGH_NUM_HASH_RND; i++) neigh_get_hash_rnd(&ret->hash_rnd[i]); return ret; } static void neigh_hash_free_rcu(struct rcu_head *head) { struct neigh_hash_table *nht = container_of(head, struct neigh_hash_table, rcu); size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); struct neighbour __rcu **buckets = nht->hash_buckets; if (size <= PAGE_SIZE) { kfree(buckets); } else { kmemleak_free(buckets); free_pages((unsigned long)buckets, get_order(size)); } kfree(nht); } static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, unsigned long new_shift) { unsigned int i, hash; struct neigh_hash_table *new_nht, *old_nht; NEIGH_CACHE_STAT_INC(tbl, hash_grows); old_nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); new_nht = neigh_hash_alloc(new_shift); if (!new_nht) return old_nht; for (i = 0; i < (1 << old_nht->hash_shift); i++) { struct neighbour *n, *next; for (n = rcu_dereference_protected(old_nht->hash_buckets[i], lockdep_is_held(&tbl->lock)); n != NULL; n = next) { hash = tbl->hash(n->primary_key, n->dev, new_nht->hash_rnd); hash >>= (32 - new_nht->hash_shift); next = rcu_dereference_protected(n->next, lockdep_is_held(&tbl->lock)); rcu_assign_pointer(n->next, rcu_dereference_protected( new_nht->hash_buckets[hash], lockdep_is_held(&tbl->lock))); rcu_assign_pointer(new_nht->hash_buckets[hash], n); } } rcu_assign_pointer(tbl->nht, new_nht); call_rcu(&old_nht->rcu, neigh_hash_free_rcu); return new_nht; } struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev) { struct neighbour *n; NEIGH_CACHE_STAT_INC(tbl, lookups); rcu_read_lock_bh(); n = __neigh_lookup_noref(tbl, pkey, dev); if (n) { if (!refcount_inc_not_zero(&n->refcnt)) n = NULL; NEIGH_CACHE_STAT_INC(tbl, hits); } rcu_read_unlock_bh(); return n; } EXPORT_SYMBOL(neigh_lookup); struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, const void *pkey) { struct neighbour *n; unsigned int key_len = tbl->key_len; u32 hash_val; struct neigh_hash_table *nht; NEIGH_CACHE_STAT_INC(tbl, lookups); rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift); for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); n != NULL; n = rcu_dereference_bh(n->next)) { if (!memcmp(n->primary_key, pkey, key_len) && net_eq(dev_net(n->dev), net)) { if (!refcount_inc_not_zero(&n->refcnt)) n = NULL; NEIGH_CACHE_STAT_INC(tbl, hits); break; } } rcu_read_unlock_bh(); return n; } EXPORT_SYMBOL(neigh_lookup_nodev); static struct neighbour *___neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, bool exempt_from_gc, bool want_ref) { struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc); u32 hash_val; unsigned int key_len = tbl->key_len; int error; struct neigh_hash_table *nht; trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc); if (!n) { rc = ERR_PTR(-ENOBUFS); goto out; } memcpy(n->primary_key, pkey, key_len); n->dev = dev; dev_hold(dev); /* Protocol specific setup. */ if (tbl->constructor && (error = tbl->constructor(n)) < 0) { rc = ERR_PTR(error); goto out_neigh_release; } if (dev->netdev_ops->ndo_neigh_construct) { error = dev->netdev_ops->ndo_neigh_construct(dev, n); if (error < 0) { rc = ERR_PTR(error); goto out_neigh_release; } } /* Device specific setup. */ if (n->parms->neigh_setup && (error = n->parms->neigh_setup(n)) < 0) { rc = ERR_PTR(error); goto out_neigh_release; } n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1); write_lock_bh(&tbl->lock); nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) nht = neigh_hash_grow(tbl, nht->hash_shift + 1); hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift); if (n->parms->dead) { rc = ERR_PTR(-EINVAL); goto out_tbl_unlock; } for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], lockdep_is_held(&tbl->lock)); n1 != NULL; n1 = rcu_dereference_protected(n1->next, lockdep_is_held(&tbl->lock))) { if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { if (want_ref) neigh_hold(n1); rc = n1; goto out_tbl_unlock; } } n->dead = 0; if (!exempt_from_gc) list_add_tail(&n->gc_list, &n->tbl->gc_list); if (want_ref) neigh_hold(n); rcu_assign_pointer(n->next, rcu_dereference_protected(nht->hash_buckets[hash_val], lockdep_is_held(&tbl->lock))); rcu_assign_pointer(nht->hash_buckets[hash_val], n); write_unlock_bh(&tbl->lock); neigh_dbg(2, "neigh %p is created\n", n); rc = n; out: return rc; out_tbl_unlock: write_unlock_bh(&tbl->lock); out_neigh_release: if (!exempt_from_gc) atomic_dec(&tbl->gc_entries); neigh_release(n); goto out; } struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, bool want_ref) { return ___neigh_create(tbl, pkey, dev, false, want_ref); } EXPORT_SYMBOL(__neigh_create); static u32 pneigh_hash(const void *pkey, unsigned int key_len) { u32 hash_val = *(u32 *)(pkey + key_len - 4); hash_val ^= (hash_val >> 16); hash_val ^= hash_val >> 8; hash_val ^= hash_val >> 4; hash_val &= PNEIGH_HASHMASK; return hash_val; } static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, struct net *net, const void *pkey, unsigned int key_len, struct net_device *dev) { while (n) { if (!memcmp(n->key, pkey, key_len) && net_eq(pneigh_net(n), net) && (n->dev == dev || !n->dev)) return n; n = n->next; } return NULL; } struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *pkey, struct net_device *dev) { unsigned int key_len = tbl->key_len; u32 hash_val = pneigh_hash(pkey, key_len); return __pneigh_lookup_1(tbl->phash_buckets[hash_val], net, pkey, key_len, dev); } EXPORT_SYMBOL_GPL(__pneigh_lookup); struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *pkey, struct net_device *dev, int creat) { struct pneigh_entry *n; unsigned int key_len = tbl->key_len; u32 hash_val = pneigh_hash(pkey, key_len); read_lock_bh(&tbl->lock); n = __pneigh_lookup_1(tbl->phash_buckets[hash_val], net, pkey, key_len, dev); read_unlock_bh(&tbl->lock); if (n || !creat) goto out; ASSERT_RTNL(); n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); if (!n) goto out; n->protocol = 0; write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; if (dev) dev_hold(dev); if (tbl->pconstructor && tbl->pconstructor(n)) { if (dev) dev_put(dev); kfree(n); n = NULL; goto out; } write_lock_bh(&tbl->lock); n->next = tbl->phash_buckets[hash_val]; tbl->phash_buckets[hash_val] = n; write_unlock_bh(&tbl->lock); out: return n; } EXPORT_SYMBOL(pneigh_lookup); int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, struct net_device *dev) { struct pneigh_entry *n, **np; unsigned int key_len = tbl->key_len; u32 hash_val = pneigh_hash(pkey, key_len); write_lock_bh(&tbl->lock); for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; np = &n->next) { if (!memcmp(n->key, pkey, key_len) && n->dev == dev && net_eq(pneigh_net(n), net)) { *np = n->next; write_unlock_bh(&tbl->lock); if (tbl->pdestructor) tbl->pdestructor(n); if (n->dev) dev_put(n->dev); kfree(n); return 0; } } write_unlock_bh(&tbl->lock); return -ENOENT; } static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, struct net_device *dev) { struct pneigh_entry *n, **np, *freelist = NULL; u32 h; for (h = 0; h <= PNEIGH_HASHMASK; h++) { np = &tbl->phash_buckets[h]; while ((n = *np) != NULL) { if (!dev || n->dev == dev) { *np = n->next; n->next = freelist; freelist = n; continue; } np = &n->next; } } write_unlock_bh(&tbl->lock); while ((n = freelist)) { freelist = n->next; n->next = NULL; if (tbl->pdestructor) tbl->pdestructor(n); if (n->dev) dev_put(n->dev); kfree(n); } return -ENOENT; } static void neigh_parms_destroy(struct neigh_parms *parms); static inline void neigh_parms_put(struct neigh_parms *parms) { if (refcount_dec_and_test(&parms->refcnt)) neigh_parms_destroy(parms); } /* * neighbour must already be out of the table; * */ void neigh_destroy(struct neighbour *neigh) { struct net_device *dev = neigh->dev; NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); if (!neigh->dead) { pr_warn("Destroying alive neighbour %p\n", neigh); dump_stack(); return; } if (neigh_del_timer(neigh)) pr_warn("Impossible event\n"); write_lock_bh(&neigh->lock); __skb_queue_purge(&neigh->arp_queue); write_unlock_bh(&neigh->lock); neigh->arp_queue_len_bytes = 0; if (dev->netdev_ops->ndo_neigh_destroy) dev->netdev_ops->ndo_neigh_destroy(dev, neigh); dev_put(dev); neigh_parms_put(neigh->parms); neigh_dbg(2, "neigh %p is destroyed\n", neigh); atomic_dec(&neigh->tbl->entries); kfree_rcu(neigh, rcu); } EXPORT_SYMBOL(neigh_destroy); /* Neighbour state is suspicious; disable fast path. Called with write_locked neigh. */ static void neigh_suspect(struct neighbour *neigh) { neigh_dbg(2, "neigh %p is suspected\n", neigh); neigh->output = neigh->ops->output; } /* Neighbour state is OK; enable fast path. Called with write_locked neigh. */ static void neigh_connect(struct neighbour *neigh) { neigh_dbg(2, "neigh %p is connected\n", neigh); neigh->output = neigh->ops->connected_output; } static void neigh_periodic_work(struct work_struct *work) { struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); struct neighbour *n; struct neighbour __rcu **np; unsigned int i; struct neigh_hash_table *nht; NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); write_lock_bh(&tbl->lock); nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); /* * periodically recompute ReachableTime from random function */ if (time_after(jiffies, tbl->last_rand + 300 * HZ)) { struct neigh_parms *p; tbl->last_rand = jiffies; list_for_each_entry(p, &tbl->parms_list, list) p->reachable_time = neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); } if (atomic_read(&tbl->entries) < tbl->gc_thresh1) goto out; for (i = 0 ; i < (1 << nht->hash_shift); i++) { np = &nht->hash_buckets[i]; while ((n = rcu_dereference_protected(*np, lockdep_is_held(&tbl->lock))) != NULL) { unsigned int state; write_lock(&n->lock); state = n->nud_state; if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) || (n->flags & NTF_EXT_LEARNED)) { write_unlock(&n->lock); goto next_elt; } if (time_before(n->used, n->confirmed)) n->used = n->confirmed; if (refcount_read(&n->refcnt) == 1 && (state == NUD_FAILED || time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { *np = n->next; neigh_mark_dead(n); write_unlock(&n->lock); neigh_cleanup_and_release(n); continue; } write_unlock(&n->lock); next_elt: np = &n->next; } /* * It's fine to release lock here, even if hash table * grows while we are preempted. */ write_unlock_bh(&tbl->lock); cond_resched(); write_lock_bh(&tbl->lock); nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); } out: /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks. * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2 * BASE_REACHABLE_TIME. */ queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1); write_unlock_bh(&tbl->lock); } static __inline__ int neigh_max_probes(struct neighbour *n) { struct neigh_parms *p = n->parms; return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) : NEIGH_VAR(p, MCAST_PROBES)); } static void neigh_invalidate(struct neighbour *neigh) __releases(neigh->lock) __acquires(neigh->lock) { struct sk_buff *skb; NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); neigh_dbg(2, "neigh %p is failed\n", neigh); neigh->updated = jiffies; /* It is very thin place. report_unreachable is very complicated routine. Particularly, it can hit the same neighbour entry! So that, we try to be accurate and avoid dead loop. --ANK */ while (neigh->nud_state == NUD_FAILED && (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { write_unlock(&neigh->lock); neigh->ops->error_report(neigh, skb); write_lock(&neigh->lock); } __skb_queue_purge(&neigh->arp_queue); neigh->arp_queue_len_bytes = 0; } static void neigh_probe(struct neighbour *neigh) __releases(neigh->lock) { struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue); /* keep skb alive even if arp_queue overflows */ if (skb) skb = skb_clone(skb, GFP_ATOMIC); write_unlock(&neigh->lock); if (neigh->ops->solicit) neigh->ops->solicit(neigh, skb); atomic_inc(&neigh->probes); consume_skb(skb); } /* Called when a timer expires for a neighbour entry. */ static void neigh_timer_handler(struct timer_list *t) { unsigned long now, next; struct neighbour *neigh = from_timer(neigh, t, timer); unsigned int state; int notify = 0; write_lock(&neigh->lock); state = neigh->nud_state; now = jiffies; next = now + HZ; if (!(state & NUD_IN_TIMER)) goto out; if (state & NUD_REACHABLE) { if (time_before_eq(now, neigh->confirmed + neigh->parms->reachable_time)) { neigh_dbg(2, "neigh %p is still alive\n", neigh); next = neigh->confirmed + neigh->parms->reachable_time; } else if (time_before_eq(now, neigh->used + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { neigh_dbg(2, "neigh %p is delayed\n", neigh); neigh->nud_state = NUD_DELAY; neigh->updated = jiffies; neigh_suspect(neigh); next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); } else { neigh_dbg(2, "neigh %p is suspected\n", neigh); neigh->nud_state = NUD_STALE; neigh->updated = jiffies; neigh_suspect(neigh); notify = 1; } } else if (state & NUD_DELAY) { if (time_before_eq(now, neigh->confirmed + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { neigh_dbg(2, "neigh %p is now reachable\n", neigh); neigh->nud_state = NUD_REACHABLE; neigh->updated = jiffies; neigh_connect(neigh); notify = 1; next = neigh->confirmed + neigh->parms->reachable_time; } else { neigh_dbg(2, "neigh %p is probed\n", neigh); neigh->nud_state = NUD_PROBE; neigh->updated = jiffies; atomic_set(&neigh->probes, 0); notify = 1; next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100); } } else { /* NUD_PROBE|NUD_INCOMPLETE */ next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100); } if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { neigh->nud_state = NUD_FAILED; notify = 1; neigh_invalidate(neigh); goto out; } if (neigh->nud_state & NUD_IN_TIMER) { if (time_before(next, jiffies + HZ/100)) next = jiffies + HZ/100; if (!mod_timer(&neigh->timer, next)) neigh_hold(neigh); } if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { neigh_probe(neigh); } else { out: write_unlock(&neigh->lock); } if (notify) neigh_update_notify(neigh, 0); trace_neigh_timer_handler(neigh, 0); neigh_release(neigh); } int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { int rc; bool immediate_probe = false; write_lock_bh(&neigh->lock); rc = 0; if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) goto out_unlock_bh; if (neigh->dead) goto out_dead; if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + NEIGH_VAR(neigh->parms, APP_PROBES)) { unsigned long next, now = jiffies; atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); neigh_del_timer(neigh); neigh->nud_state = NUD_INCOMPLETE; neigh->updated = now; next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100); neigh_add_timer(neigh, next); immediate_probe = true; } else { neigh->nud_state = NUD_FAILED; neigh->updated = jiffies; write_unlock_bh(&neigh->lock); kfree_skb(skb); return 1; } } else if (neigh->nud_state & NUD_STALE) { neigh_dbg(2, "neigh %p is delayed\n", neigh); neigh_del_timer(neigh); neigh->nud_state = NUD_DELAY; neigh->updated = jiffies; neigh_add_timer(neigh, jiffies + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME)); } if (neigh->nud_state == NUD_INCOMPLETE) { if (skb) { while (neigh->arp_queue_len_bytes + skb->truesize > NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) { struct sk_buff *buff; buff = __skb_dequeue(&neigh->arp_queue); if (!buff) break; neigh->arp_queue_len_bytes -= buff->truesize; kfree_skb(buff); NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); } skb_dst_force(skb); __skb_queue_tail(&neigh->arp_queue, skb); neigh->arp_queue_len_bytes += skb->truesize; } rc = 1; } out_unlock_bh: if (immediate_probe) neigh_probe(neigh); else write_unlock(&neigh->lock); local_bh_enable(); trace_neigh_event_send_done(neigh, rc); return rc; out_dead: if (neigh->nud_state & NUD_STALE) goto out_unlock_bh; write_unlock_bh(&neigh->lock); kfree_skb(skb); trace_neigh_event_send_dead(neigh, 1); return 1; } EXPORT_SYMBOL(__neigh_event_send); static void neigh_update_hhs(struct neighbour *neigh) { struct hh_cache *hh; void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) = NULL; if (neigh->dev->header_ops) update = neigh->dev->header_ops->cache_update; if (update) { hh = &neigh->hh; if (READ_ONCE(hh->hh_len)) { write_seqlock_bh(&hh->hh_lock); update(hh, neigh->dev, neigh->ha); write_sequnlock_bh(&hh->hh_lock); } } } /* Generic update routine. -- lladdr is new lladdr or NULL, if it is not supplied. -- new is new state. -- flags NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr, if it is different. NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected" lladdr instead of overriding it if it is different. NEIGH_UPDATE_F_ADMIN means that the change is administrative. NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing NTF_ROUTER flag. NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as a router. Caller MUST hold reference count on the entry. */ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid, struct netlink_ext_ack *extack) { bool ext_learn_change = false; u8 old; int err; int notify = 0; struct net_device *dev; int update_isrouter = 0; trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid); write_lock_bh(&neigh->lock); dev = neigh->dev; old = neigh->nud_state; err = -EPERM; if (neigh->dead) { NL_SET_ERR_MSG(extack, "Neighbor entry is now dead"); new = old; goto out; } if (!(flags & NEIGH_UPDATE_F_ADMIN) && (old & (NUD_NOARP | NUD_PERMANENT))) goto out; ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify); if (!(new & NUD_VALID)) { neigh_del_timer(neigh); if (old & NUD_CONNECTED) neigh_suspect(neigh); neigh->nud_state = new; err = 0; notify = old & NUD_VALID; if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && (new & NUD_FAILED)) { neigh_invalidate(neigh); notify = 1; } goto out; } /* Compare new lladdr with cached one */ if (!dev->addr_len) { /* First case: device needs no address. */ lladdr = neigh->ha; } else if (lladdr) { /* The second case: if something is already cached and a new address is proposed: - compare new & old - if they are different, check override flag */ if ((old & NUD_VALID) && !memcmp(lladdr, neigh->ha, dev->addr_len)) lladdr = neigh->ha; } else { /* No address is supplied; if we know something, use it, otherwise discard the request. */ err = -EINVAL; if (!(old & NUD_VALID)) { NL_SET_ERR_MSG(extack, "No link layer address given"); goto out; } lladdr = neigh->ha; } /* Update confirmed timestamp for neighbour entry after we * received ARP packet even if it doesn't change IP to MAC binding. */ if (new & NUD_CONNECTED) neigh->confirmed = jiffies; /* If entry was valid and address is not changed, do not change entry state, if new one is STALE. */ err = 0; update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER; if (old & NUD_VALID) { if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) { update_isrouter = 0; if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) && (old & NUD_CONNECTED)) { lladdr = neigh->ha; new = NUD_STALE; } else goto out; } else { if (lladdr == neigh->ha && new == NUD_STALE && !(flags & NEIGH_UPDATE_F_ADMIN)) new = old; } } /* Update timestamp only once we know we will make a change to the * neighbour entry. Otherwise we risk to move the locktime window with * noop updates and ignore relevant ARP updates. */ if (new != old || lladdr != neigh->ha) neigh->updated = jiffies; if (new != old) { neigh_del_timer(neigh); if (new & NUD_PROBE) atomic_set(&neigh->probes, 0); if (new & NUD_IN_TIMER) neigh_add_timer(neigh, (jiffies + ((new & NUD_REACHABLE) ? neigh->parms->reachable_time : 0))); neigh->nud_state = new; notify = 1; } if (lladdr != neigh->ha) { write_seqlock(&neigh->ha_lock); memcpy(&neigh->ha, lladdr, dev->addr_len); write_sequnlock(&neigh->ha_lock); neigh_update_hhs(neigh); if (!(new & NUD_CONNECTED)) neigh->confirmed = jiffies - (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1); notify = 1; } if (new == old) goto out; if (new & NUD_CONNECTED) neigh_connect(neigh); else neigh_suspect(neigh); if (!(old & NUD_VALID)) { struct sk_buff *skb; /* Again: avoid dead loop if something went wrong */ while (neigh->nud_state & NUD_VALID && (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { struct dst_entry *dst = skb_dst(skb); struct neighbour *n2, *n1 = neigh; write_unlock_bh(&neigh->lock); rcu_read_lock(); /* Why not just use 'neigh' as-is? The problem is that * things such as shaper, eql, and sch_teql can end up * using alternative, different, neigh objects to output * the packet in the output path. So what we need to do * here is re-lookup the top-level neigh in the path so * we can reinject the packet there. */ n2 = NULL; if (dst && dst->obsolete != DST_OBSOLETE_DEAD) { n2 = dst_neigh_lookup_skb(dst, skb); if (n2) n1 = n2; } n1->output(n1, skb); if (n2) neigh_release(n2); rcu_read_unlock(); write_lock_bh(&neigh->lock); } __skb_queue_purge(&neigh->arp_queue); neigh->arp_queue_len_bytes = 0; } out: if (update_isrouter) neigh_update_is_router(neigh, flags, &notify); write_unlock_bh(&neigh->lock); if (((new ^ old) & NUD_PERMANENT) || ext_learn_change) neigh_update_gc_list(neigh); if (notify) neigh_update_notify(neigh, nlmsg_pid); trace_neigh_update_done(neigh, err); return err; } int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid) { return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL); } EXPORT_SYMBOL(neigh_update); /* Update the neigh to listen temporarily for probe responses, even if it is * in a NUD_FAILED state. The caller has to hold neigh->lock for writing. */ void __neigh_set_probe_once(struct neighbour *neigh) { if (neigh->dead) return; neigh->updated = jiffies; if (!(neigh->nud_state & NUD_FAILED)) return; neigh->nud_state = NUD_INCOMPLETE; atomic_set(&neigh->probes, neigh_max_probes(neigh)); neigh_add_timer(neigh, jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100)); } EXPORT_SYMBOL(__neigh_set_probe_once); struct neighbour *neigh_event_ns(struct neigh_table *tbl, u8 *lladdr, void *saddr, struct net_device *dev) { struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev, lladdr || !dev->addr_len); if (neigh) neigh_update(neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_OVERRIDE, 0); return neigh; } EXPORT_SYMBOL(neigh_event_ns); /* called with read_lock_bh(&n->lock); */ static void neigh_hh_init(struct neighbour *n) { struct net_device *dev = n->dev; __be16 prot = n->tbl->protocol; struct hh_cache *hh = &n->hh; write_lock_bh(&n->lock); /* Only one thread can come in here and initialize the * hh_cache entry. */ if (!hh->hh_len) dev->header_ops->cache(n, hh, prot); write_unlock_bh(&n->lock); } /* Slow and careful. */ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) { int rc = 0; if (!neigh_event_send(neigh, skb)) { int err; struct net_device *dev = neigh->dev; unsigned int seq; if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len)) neigh_hh_init(neigh); do { __skb_pull(skb, skb_network_offset(skb)); seq = read_seqbegin(&neigh->ha_lock); err = dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len); } while (read_seqretry(&neigh->ha_lock, seq)); if (err >= 0) rc = dev_queue_xmit(skb); else goto out_kfree_skb; } out: return rc; out_kfree_skb: rc = -EINVAL; kfree_skb(skb); goto out; } EXPORT_SYMBOL(neigh_resolve_output); /* As fast as possible without hh cache */ int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb) { struct net_device *dev = neigh->dev; unsigned int seq; int err; do { __skb_pull(skb, skb_network_offset(skb)); seq = read_seqbegin(&neigh->ha_lock); err = dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len); } while (read_seqretry(&neigh->ha_lock, seq)); if (err >= 0) err = dev_queue_xmit(skb); else { err = -EINVAL; kfree_skb(skb); } return err; } EXPORT_SYMBOL(neigh_connected_output); int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb) { return dev_queue_xmit(skb); } EXPORT_SYMBOL(neigh_direct_output); static void neigh_proxy_process(struct timer_list *t) { struct neigh_table *tbl = from_timer(tbl, t, proxy_timer); long sched_next = 0; unsigned long now = jiffies; struct sk_buff *skb, *n; spin_lock(&tbl->proxy_queue.lock); skb_queue_walk_safe(&tbl->proxy_queue, skb, n) { long tdif = NEIGH_CB(skb)->sched_next - now; if (tdif <= 0) { struct net_device *dev = skb->dev; __skb_unlink(skb, &tbl->proxy_queue); if (tbl->proxy_redo && netif_running(dev)) { rcu_read_lock(); tbl->proxy_redo(skb); rcu_read_unlock(); } else { kfree_skb(skb); } dev_put(dev); } else if (!sched_next || tdif < sched_next) sched_next = tdif; } del_timer(&tbl->proxy_timer); if (sched_next) mod_timer(&tbl->proxy_timer, jiffies + sched_next); spin_unlock(&tbl->proxy_queue.lock); } void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb) { unsigned long now = jiffies; unsigned long sched_next = now + (prandom_u32() % NEIGH_VAR(p, PROXY_DELAY)); if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { kfree_skb(skb); return; } NEIGH_CB(skb)->sched_next = sched_next; NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED; spin_lock(&tbl->proxy_queue.lock); if (del_timer(&tbl->proxy_timer)) { if (time_before(tbl->proxy_timer.expires, sched_next)) sched_next = tbl->proxy_timer.expires; } skb_dst_drop(skb); dev_hold(skb->dev); __skb_queue_tail(&tbl->proxy_queue, skb); mod_timer(&tbl->proxy_timer, sched_next); spin_unlock(&tbl->proxy_queue.lock); } EXPORT_SYMBOL(pneigh_enqueue); static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, struct net *net, int ifindex) { struct neigh_parms *p; list_for_each_entry(p, &tbl->parms_list, list) { if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) || (!p->dev && !ifindex && net_eq(net, &init_net))) return p; } return NULL; } struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl) { struct neigh_parms *p; struct net *net = dev_net(dev); const struct net_device_ops *ops = dev->netdev_ops; p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); if (p) { p->tbl = tbl; refcount_set(&p->refcnt, 1); p->reachable_time = neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); dev_hold(dev); p->dev = dev; write_pnet(&p->net, net); p->sysctl_table = NULL; if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { dev_put(dev); kfree(p); return NULL; } write_lock_bh(&tbl->lock); list_add(&p->list, &tbl->parms.list); write_unlock_bh(&tbl->lock); neigh_parms_data_state_cleanall(p); } return p; } EXPORT_SYMBOL(neigh_parms_alloc); static void neigh_rcu_free_parms(struct rcu_head *head) { struct neigh_parms *parms = container_of(head, struct neigh_parms, rcu_head); neigh_parms_put(parms); } void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) { if (!parms || parms == &tbl->parms) return; write_lock_bh(&tbl->lock); list_del(&parms->list); parms->dead = 1; write_unlock_bh(&tbl->lock); if (parms->dev) dev_put(parms->dev); call_rcu(&parms->rcu_head, neigh_rcu_free_parms); } EXPORT_SYMBOL(neigh_parms_release); static void neigh_parms_destroy(struct neigh_parms *parms) { kfree(parms); } static struct lock_class_key neigh_table_proxy_queue_class; static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly; void neigh_table_init(int index, struct neigh_table *tbl) { unsigned long now = jiffies; unsigned long phsize; INIT_LIST_HEAD(&tbl->parms_list); INIT_LIST_HEAD(&tbl->gc_list); list_add(&tbl->parms.list, &tbl->parms_list); write_pnet(&tbl->parms.net, &init_net); refcount_set(&tbl->parms.refcnt, 1); tbl->parms.reachable_time = neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); tbl->stats = alloc_percpu(struct neigh_statistics); if (!tbl->stats) panic("cannot create neighbour cache statistics"); #ifdef CONFIG_PROC_FS if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat, &neigh_stat_seq_ops, tbl)) panic("cannot create neighbour proc dir entry"); #endif RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3)); phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); if (!tbl->nht || !tbl->phash_buckets) panic("cannot allocate neighbour cache hashes"); if (!tbl->entry_size) tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) + tbl->key_len, NEIGH_PRIV_ALIGN); else WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); rwlock_init(&tbl->lock); INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, tbl->parms.reachable_time); timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0); skb_queue_head_init_class(&tbl->proxy_queue, &neigh_table_proxy_queue_class); tbl->last_flush = now; tbl->last_rand = now + tbl->parms.reachable_time * 20; neigh_tables[index] = tbl; } EXPORT_SYMBOL(neigh_table_init); int neigh_table_clear(int index, struct neigh_table *tbl) { neigh_tables[index] = NULL; /* It is not clean... Fix it to unload IPv6 module safely */ cancel_delayed_work_sync(&tbl->gc_work); del_timer_sync(&tbl->proxy_timer); pneigh_queue_purge(&tbl->proxy_queue); neigh_ifdown(tbl, NULL); if (atomic_read(&tbl->entries)) pr_crit("neighbour leakage\n"); call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, neigh_hash_free_rcu); tbl->nht = NULL; kfree(tbl->phash_buckets); tbl->phash_buckets = NULL; remove_proc_entry(tbl->id, init_net.proc_net_stat); free_percpu(tbl->stats); tbl->stats = NULL; return 0; } EXPORT_SYMBOL(neigh_table_clear); static struct neigh_table *neigh_find_table(int family) { struct neigh_table *tbl = NULL; switch (family) { case AF_INET: tbl = neigh_tables[NEIGH_ARP_TABLE]; break; case AF_INET6: tbl = neigh_tables[NEIGH_ND_TABLE]; break; case AF_DECnet: tbl = neigh_tables[NEIGH_DN_TABLE]; break; } return tbl; } const struct nla_policy nda_policy[NDA_MAX+1] = { [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID }, [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) }, [NDA_PROBES] = { .type = NLA_U32 }, [NDA_VLAN] = { .type = NLA_U16 }, [NDA_PORT] = { .type = NLA_U16 }, [NDA_VNI] = { .type = NLA_U32 }, [NDA_IFINDEX] = { .type = NLA_U32 }, [NDA_MASTER] = { .type = NLA_U32 }, [NDA_PROTOCOL] = { .type = NLA_U8 }, [NDA_NH_ID] = { .type = NLA_U32 }, [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED }, }; static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; struct nlattr *dst_attr; struct neigh_table *tbl; struct neighbour *neigh; struct net_device *dev = NULL; int err = -EINVAL; ASSERT_RTNL(); if (nlmsg_len(nlh) < sizeof(*ndm)) goto out; dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST); if (!dst_attr) { NL_SET_ERR_MSG(extack, "Network address not specified"); goto out; } ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex) { dev = __dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { err = -ENODEV; goto out; } } tbl = neigh_find_table(ndm->ndm_family); if (tbl == NULL) return -EAFNOSUPPORT; if (nla_len(dst_attr) < (int)tbl->key_len) { NL_SET_ERR_MSG(extack, "Invalid network address"); goto out; } if (ndm->ndm_flags & NTF_PROXY) { err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); goto out; } if (dev == NULL) goto out; neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); if (neigh == NULL) { err = -ENOENT; goto out; } err = __neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, NETLINK_CB(skb).portid, extack); write_lock_bh(&tbl->lock); neigh_release(neigh); neigh_remove_one(neigh, tbl); write_unlock_bh(&tbl->lock); out: return err; } static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_OVERRIDE_ISROUTER; struct net *net = sock_net(skb->sk); struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct neigh_table *tbl; struct net_device *dev = NULL; struct neighbour *neigh; void *dst, *lladdr; u8 protocol = 0; int err; ASSERT_RTNL(); err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, nda_policy, extack); if (err < 0) goto out; err = -EINVAL; if (!tb[NDA_DST]) { NL_SET_ERR_MSG(extack, "Network address not specified"); goto out; } ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex) { dev = __dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { err = -ENODEV; goto out; } if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) { NL_SET_ERR_MSG(extack, "Invalid link address"); goto out; } } tbl = neigh_find_table(ndm->ndm_family); if (tbl == NULL) return -EAFNOSUPPORT; if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) { NL_SET_ERR_MSG(extack, "Invalid network address"); goto out; } dst = nla_data(tb[NDA_DST]); lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; if (tb[NDA_PROTOCOL]) protocol = nla_get_u8(tb[NDA_PROTOCOL]); if (ndm->ndm_flags & NTF_PROXY) { struct pneigh_entry *pn; err = -ENOBUFS; pn = pneigh_lookup(tbl, net, dst, dev, 1); if (pn) { pn->flags = ndm->ndm_flags; if (protocol) pn->protocol = protocol; err = 0; } goto out; } if (!dev) { NL_SET_ERR_MSG(extack, "Device not specified"); goto out; } if (tbl->allow_add && !tbl->allow_add(dev, extack)) { err = -EINVAL; goto out; } neigh = neigh_lookup(tbl, dst, dev); if (neigh == NULL) { bool exempt_from_gc; if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { err = -ENOENT; goto out; } exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || ndm->ndm_flags & NTF_EXT_LEARNED; neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true); if (IS_ERR(neigh)) { err = PTR_ERR(neigh); goto out; } } else { if (nlh->nlmsg_flags & NLM_F_EXCL) { err = -EEXIST; neigh_release(neigh); goto out; } if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) flags &= ~(NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_OVERRIDE_ISROUTER); } if (protocol) neigh->protocol = protocol; if (ndm->ndm_flags & NTF_EXT_LEARNED) flags |= NEIGH_UPDATE_F_EXT_LEARNED; if (ndm->ndm_flags & NTF_ROUTER) flags |= NEIGH_UPDATE_F_ISROUTER; if (ndm->ndm_flags & NTF_USE) { neigh_event_send(neigh, NULL); err = 0; } else err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, NETLINK_CB(skb).portid, extack); neigh_release(neigh); out: return err; } static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) { struct nlattr *nest; nest = nla_nest_start_noflag(skb, NDTA_PARMS); if (nest == NULL) return -ENOBUFS; if ((parms->dev && nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) || nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, NEIGH_VAR(parms, QUEUE_LEN_BYTES)) || /* approximative value for deprecated QUEUE_LEN (in packets) */ nla_put_u32(skb, NDTPA_QUEUE_LEN, NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) || nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) || nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) || nla_put_u32(skb, NDTPA_UCAST_PROBES, NEIGH_VAR(parms, UCAST_PROBES)) || nla_put_u32(skb, NDTPA_MCAST_PROBES, NEIGH_VAR(parms, MCAST_PROBES)) || nla_put_u32(skb, NDTPA_MCAST_REPROBES, NEIGH_VAR(parms, MCAST_REPROBES)) || nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time, NDTPA_PAD) || nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_GC_STALETIME, NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_RETRANS_TIME, NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_PROXY_DELAY, NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_LOCKTIME, NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD)) goto nla_put_failure; return nla_nest_end(skb, nest); nla_put_failure: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, u32 pid, u32 seq, int type, int flags) { struct nlmsghdr *nlh; struct ndtmsg *ndtmsg; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); if (nlh == NULL) return -EMSGSIZE; ndtmsg = nlmsg_data(nlh); read_lock_bh(&tbl->lock); ndtmsg->ndtm_family = tbl->family; ndtmsg->ndtm_pad1 = 0; ndtmsg->ndtm_pad2 = 0; if (nla_put_string(skb, NDTA_NAME, tbl->id) || nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) || nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) || nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) || nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3)) goto nla_put_failure; { unsigned long now = jiffies; long flush_delta = now - tbl->last_flush; long rand_delta = now - tbl->last_rand; struct neigh_hash_table *nht; struct ndt_config ndc = { .ndtc_key_len = tbl->key_len, .ndtc_entry_size = tbl->entry_size, .ndtc_entries = atomic_read(&tbl->entries), .ndtc_last_flush = jiffies_to_msecs(flush_delta), .ndtc_last_rand = jiffies_to_msecs(rand_delta), .ndtc_proxy_qlen = tbl->proxy_queue.qlen, }; rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); ndc.ndtc_hash_rnd = nht->hash_rnd[0]; ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); rcu_read_unlock_bh(); if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc)) goto nla_put_failure; } { int cpu; struct ndt_stats ndst; memset(&ndst, 0, sizeof(ndst)); for_each_possible_cpu(cpu) { struct neigh_statistics *st; st = per_cpu_ptr(tbl->stats, cpu); ndst.ndts_allocs += st->allocs; ndst.ndts_destroys += st->destroys; ndst.ndts_hash_grows += st->hash_grows; ndst.ndts_res_failed += st->res_failed; ndst.ndts_lookups += st->lookups; ndst.ndts_hits += st->hits; ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast; ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast; ndst.ndts_periodic_gc_runs += st->periodic_gc_runs; ndst.ndts_forced_gc_runs += st->forced_gc_runs; ndst.ndts_table_fulls += st->table_fulls; } if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst, NDTA_PAD)) goto nla_put_failure; } BUG_ON(tbl->parms.dev); if (neightbl_fill_parms(skb, &tbl->parms) < 0) goto nla_put_failure; read_unlock_bh(&tbl->lock); nlmsg_end(skb, nlh); return 0; nla_put_failure: read_unlock_bh(&tbl->lock); nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static int neightbl_fill_param_info(struct sk_buff *skb, struct neigh_table *tbl, struct neigh_parms *parms, u32 pid, u32 seq, int type, unsigned int flags) { struct ndtmsg *ndtmsg; struct nlmsghdr *nlh; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); if (nlh == NULL) return -EMSGSIZE; ndtmsg = nlmsg_data(nlh); read_lock_bh(&tbl->lock); ndtmsg->ndtm_family = tbl->family; ndtmsg->ndtm_pad1 = 0; ndtmsg->ndtm_pad2 = 0; if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 || neightbl_fill_parms(skb, parms) < 0) goto errout; read_unlock_bh(&tbl->lock); nlmsg_end(skb, nlh); return 0; errout: read_unlock_bh(&tbl->lock); nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { [NDTA_NAME] = { .type = NLA_STRING }, [NDTA_THRESH1] = { .type = NLA_U32 }, [NDTA_THRESH2] = { .type = NLA_U32 }, [NDTA_THRESH3] = { .type = NLA_U32 }, [NDTA_GC_INTERVAL] = { .type = NLA_U64 }, [NDTA_PARMS] = { .type = NLA_NESTED }, }; static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { [NDTPA_IFINDEX] = { .type = NLA_U32 }, [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, [NDTPA_APP_PROBES] = { .type = NLA_U32 }, [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, [NDTPA_MCAST_PROBES] = { .type = NLA_U32 }, [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 }, [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, [NDTPA_GC_STALETIME] = { .type = NLA_U64 }, [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 }, [NDTPA_RETRANS_TIME] = { .type = NLA_U64 }, [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 }, [NDTPA_PROXY_DELAY] = { .type = NLA_U64 }, [NDTPA_LOCKTIME] = { .type = NLA_U64 }, }; static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct neigh_table *tbl; struct ndtmsg *ndtmsg; struct nlattr *tb[NDTA_MAX+1]; bool found = false; int err, tidx; err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, nl_neightbl_policy, extack); if (err < 0) goto errout; if (tb[NDTA_NAME] == NULL) { err = -EINVAL; goto errout; } ndtmsg = nlmsg_data(nlh); for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { tbl = neigh_tables[tidx]; if (!tbl) continue; if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) continue; if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) { found = true; break; } } if (!found) return -ENOENT; /* * We acquire tbl->lock to be nice to the periodic timers and * make sure they always see a consistent set of values. */ write_lock_bh(&tbl->lock); if (tb[NDTA_PARMS]) { struct nlattr *tbp[NDTPA_MAX+1]; struct neigh_parms *p; int i, ifindex = 0; err = nla_parse_nested_deprecated(tbp, NDTPA_MAX, tb[NDTA_PARMS], nl_ntbl_parm_policy, extack); if (err < 0) goto errout_tbl_lock; if (tbp[NDTPA_IFINDEX]) ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); p = lookup_neigh_parms(tbl, net, ifindex); if (p == NULL) { err = -ENOENT; goto errout_tbl_lock; } for (i = 1; i <= NDTPA_MAX; i++) { if (tbp[i] == NULL) continue; switch (i) { case NDTPA_QUEUE_LEN: NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, nla_get_u32(tbp[i]) * SKB_TRUESIZE(ETH_FRAME_LEN)); break; case NDTPA_QUEUE_LENBYTES: NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, nla_get_u32(tbp[i])); break; case NDTPA_PROXY_QLEN: NEIGH_VAR_SET(p, PROXY_QLEN, nla_get_u32(tbp[i])); break; case NDTPA_APP_PROBES: NEIGH_VAR_SET(p, APP_PROBES, nla_get_u32(tbp[i])); break; case NDTPA_UCAST_PROBES: NEIGH_VAR_SET(p, UCAST_PROBES, nla_get_u32(tbp[i])); break; case NDTPA_MCAST_PROBES: NEIGH_VAR_SET(p, MCAST_PROBES, nla_get_u32(tbp[i])); break; case NDTPA_MCAST_REPROBES: NEIGH_VAR_SET(p, MCAST_REPROBES, nla_get_u32(tbp[i])); break; case NDTPA_BASE_REACHABLE_TIME: NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, nla_get_msecs(tbp[i])); /* update reachable_time as well, otherwise, the change will * only be effective after the next time neigh_periodic_work * decides to recompute it (can be multiple minutes) */ p->reachable_time = neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); break; case NDTPA_GC_STALETIME: NEIGH_VAR_SET(p, GC_STALETIME, nla_get_msecs(tbp[i])); break; case NDTPA_DELAY_PROBE_TIME: NEIGH_VAR_SET(p, DELAY_PROBE_TIME, nla_get_msecs(tbp[i])); call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); break; case NDTPA_RETRANS_TIME: NEIGH_VAR_SET(p, RETRANS_TIME, nla_get_msecs(tbp[i])); break; case NDTPA_ANYCAST_DELAY: NEIGH_VAR_SET(p, ANYCAST_DELAY, nla_get_msecs(tbp[i])); break; case NDTPA_PROXY_DELAY: NEIGH_VAR_SET(p, PROXY_DELAY, nla_get_msecs(tbp[i])); break; case NDTPA_LOCKTIME: NEIGH_VAR_SET(p, LOCKTIME, nla_get_msecs(tbp[i])); break; } } } err = -ENOENT; if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] || tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) && !net_eq(net, &init_net)) goto errout_tbl_lock; if (tb[NDTA_THRESH1]) tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); if (tb[NDTA_THRESH2]) tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]); if (tb[NDTA_THRESH3]) tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]); if (tb[NDTA_GC_INTERVAL]) tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]); err = 0; errout_tbl_lock: write_unlock_bh(&tbl->lock); errout: return err; } static int neightbl_valid_dump_info(const struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct ndtmsg *ndtm; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) { NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request"); return -EINVAL; } ndtm = nlmsg_data(nlh); if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) { NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request"); return -EINVAL; } if (nlmsg_attrlen(nlh, sizeof(*ndtm))) { NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request"); return -EINVAL; } return 0; } static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); int family, tidx, nidx = 0; int tbl_skip = cb->args[0]; int neigh_skip = cb->args[1]; struct neigh_table *tbl; if (cb->strict_check) { int err = neightbl_valid_dump_info(nlh, cb->extack); if (err < 0) return err; } family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { struct neigh_parms *p; tbl = neigh_tables[tidx]; if (!tbl) continue; if (tidx < tbl_skip || (family && tbl->family != family)) continue; if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, nlh->nlmsg_seq, RTM_NEWNEIGHTBL, NLM_F_MULTI) < 0) break; nidx = 0; p = list_next_entry(&tbl->parms, list); list_for_each_entry_from(p, &tbl->parms_list, list) { if (!net_eq(neigh_parms_net(p), net)) continue; if (nidx < neigh_skip) goto next; if (neightbl_fill_param_info(skb, tbl, p, NETLINK_CB(cb->skb).portid, nlh->nlmsg_seq, RTM_NEWNEIGHTBL, NLM_F_MULTI) < 0) goto out; next: nidx++; } neigh_skip = 0; } out: cb->args[0] = tidx; cb->args[1] = nidx; return skb->len; } static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, u32 pid, u32 seq, int type, unsigned int flags) { unsigned long now = jiffies; struct nda_cacheinfo ci; struct nlmsghdr *nlh; struct ndmsg *ndm; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); if (nlh == NULL) return -EMSGSIZE; ndm = nlmsg_data(nlh); ndm->ndm_family = neigh->ops->family; ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; ndm->ndm_flags = neigh->flags; ndm->ndm_type = neigh->type; ndm->ndm_ifindex = neigh->dev->ifindex; if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key)) goto nla_put_failure; read_lock_bh(&neigh->lock); ndm->ndm_state = neigh->nud_state; if (neigh->nud_state & NUD_VALID) { char haddr[MAX_ADDR_LEN]; neigh_ha_snapshot(haddr, neigh, neigh->dev); if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) { read_unlock_bh(&neigh->lock); goto nla_put_failure; } } ci.ndm_used = jiffies_to_clock_t(now - neigh->used); ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed); ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated); ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1; read_unlock_bh(&neigh->lock); if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) || nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol)) goto nla_put_failure; nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, u32 pid, u32 seq, int type, unsigned int flags, struct neigh_table *tbl) { struct nlmsghdr *nlh; struct ndmsg *ndm; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); if (nlh == NULL) return -EMSGSIZE; ndm = nlmsg_data(nlh); ndm->ndm_family = tbl->family; ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; ndm->ndm_flags = pn->flags | NTF_PROXY; ndm->ndm_type = RTN_UNICAST; ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0; ndm->ndm_state = NUD_NONE; if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) goto nla_put_failure; if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol)) goto nla_put_failure; nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid) { call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid); } static bool neigh_master_filtered(struct net_device *dev, int master_idx) { struct net_device *master; if (!master_idx) return false; master = dev ? netdev_master_upper_dev_get(dev) : NULL; if (!master || master->ifindex != master_idx) return true; return false; } static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx) { if (filter_idx && (!dev || dev->ifindex != filter_idx)) return true; return false; } struct neigh_dump_filter { int master_idx; int dev_idx; }; static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb, struct neigh_dump_filter *filter) { struct net *net = sock_net(skb->sk); struct neighbour *n; int rc, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; struct neigh_hash_table *nht; unsigned int flags = NLM_F_MULTI; if (filter->dev_idx || filter->master_idx) flags |= NLM_F_DUMP_FILTERED; rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); for (h = s_h; h < (1 << nht->hash_shift); h++) { if (h > s_h) s_idx = 0; for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; n != NULL; n = rcu_dereference_bh(n->next)) { if (idx < s_idx || !net_eq(dev_net(n->dev), net)) goto next; if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || neigh_master_filtered(n->dev, filter->master_idx)) goto next; if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, flags) < 0) { rc = -1; goto out; } next: idx++; } } rc = skb->len; out: rcu_read_unlock_bh(); cb->args[1] = h; cb->args[2] = idx; return rc; } static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb, struct neigh_dump_filter *filter) { struct pneigh_entry *n; struct net *net = sock_net(skb->sk); int rc, h, s_h = cb->args[3]; int idx, s_idx = idx = cb->args[4]; unsigned int flags = NLM_F_MULTI; if (filter->dev_idx || filter->master_idx) flags |= NLM_F_DUMP_FILTERED; read_lock_bh(&tbl->lock); for (h = s_h; h <= PNEIGH_HASHMASK; h++) { if (h > s_h) s_idx = 0; for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { if (idx < s_idx || pneigh_net(n) != net) goto next; if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || neigh_master_filtered(n->dev, filter->master_idx)) goto next; if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, flags, tbl) < 0) { read_unlock_bh(&tbl->lock); rc = -1; goto out; } next: idx++; } } read_unlock_bh(&tbl->lock); rc = skb->len; out: cb->args[3] = h; cb->args[4] = idx; return rc; } static int neigh_valid_dump_req(const struct nlmsghdr *nlh, bool strict_check, struct neigh_dump_filter *filter, struct netlink_ext_ack *extack) { struct nlattr *tb[NDA_MAX + 1]; int err, i; if (strict_check) { struct ndmsg *ndm; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request"); return -EINVAL; } ndm = nlmsg_data(nlh); if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex || ndm->ndm_state || ndm->ndm_type) { NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request"); return -EINVAL; } if (ndm->ndm_flags & ~NTF_PROXY) { NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request"); return -EINVAL; } err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, nda_policy, extack); } else { err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb, NDA_MAX, nda_policy, extack); } if (err < 0) return err; for (i = 0; i <= NDA_MAX; ++i) { if (!tb[i]) continue; /* all new attributes should require strict_check */ switch (i) { case NDA_IFINDEX: filter->dev_idx = nla_get_u32(tb[i]); break; case NDA_MASTER: filter->master_idx = nla_get_u32(tb[i]); break; default: if (strict_check) { NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request"); return -EINVAL; } } } return 0; } static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; struct neigh_dump_filter filter = {}; struct neigh_table *tbl; int t, family, s_t; int proxy = 0; int err; family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; /* check for full ndmsg structure presence, family member is * the same for both structures */ if (nlmsg_len(nlh) >= sizeof(struct ndmsg) && ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY) proxy = 1; err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack); if (err < 0 && cb->strict_check) return err; s_t = cb->args[0]; for (t = 0; t < NEIGH_NR_TABLES; t++) { tbl = neigh_tables[t]; if (!tbl) continue; if (t < s_t || (family && tbl->family != family)) continue; if (t > s_t) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); if (proxy) err = pneigh_dump_table(tbl, skb, cb, &filter); else err = neigh_dump_table(tbl, skb, cb, &filter); if (err < 0) break; } cb->args[0] = t; return skb->len; } static int neigh_valid_get_req(const struct nlmsghdr *nlh, struct neigh_table **tbl, void **dst, int *dev_idx, u8 *ndm_flags, struct netlink_ext_ack *extack) { struct nlattr *tb[NDA_MAX + 1]; struct ndmsg *ndm; int err, i; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request"); return -EINVAL; } ndm = nlmsg_data(nlh); if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || ndm->ndm_type) { NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request"); return -EINVAL; } if (ndm->ndm_flags & ~NTF_PROXY) { NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request"); return -EINVAL; } err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, nda_policy, extack); if (err < 0) return err; *ndm_flags = ndm->ndm_flags; *dev_idx = ndm->ndm_ifindex; *tbl = neigh_find_table(ndm->ndm_family); if (*tbl == NULL) { NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request"); return -EAFNOSUPPORT; } for (i = 0; i <= NDA_MAX; ++i) { if (!tb[i]) continue; switch (i) { case NDA_DST: if (nla_len(tb[i]) != (int)(*tbl)->key_len) { NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request"); return -EINVAL; } *dst = nla_data(tb[i]); break; default: NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request"); return -EINVAL; } } return 0; } static inline size_t neigh_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */ + nla_total_size(sizeof(struct nda_cacheinfo)) + nla_total_size(4) /* NDA_PROBES */ + nla_total_size(1); /* NDA_PROTOCOL */ } static int neigh_get_reply(struct net *net, struct neighbour *neigh, u32 pid, u32 seq) { struct sk_buff *skb; int err = 0; skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL); if (!skb) return -ENOBUFS; err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0); if (err) { kfree_skb(skb); goto errout; } err = rtnl_unicast(skb, net, pid); errout: return err; } static inline size_t pneigh_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ + nla_total_size(1); /* NDA_PROTOCOL */ } static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh, u32 pid, u32 seq, struct neigh_table *tbl) { struct sk_buff *skb; int err = 0; skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL); if (!skb) return -ENOBUFS; err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl); if (err) { kfree_skb(skb); goto errout; } err = rtnl_unicast(skb, net, pid); errout: return err; } static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct net_device *dev = NULL; struct neigh_table *tbl = NULL; struct neighbour *neigh; void *dst = NULL; u8 ndm_flags = 0; int dev_idx = 0; int err; err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags, extack); if (err < 0) return err; if (dev_idx) { dev = __dev_get_by_index(net, dev_idx); if (!dev) { NL_SET_ERR_MSG(extack, "Unknown device ifindex"); return -ENODEV; } } if (!dst) { NL_SET_ERR_MSG(extack, "Network address not specified"); return -EINVAL; } if (ndm_flags & NTF_PROXY) { struct pneigh_entry *pn; pn = pneigh_lookup(tbl, net, dst, dev, 0); if (!pn) { NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found"); return -ENOENT; } return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, tbl); } if (!dev) { NL_SET_ERR_MSG(extack, "No device specified"); return -EINVAL; } neigh = neigh_lookup(tbl, dst, dev); if (!neigh) { NL_SET_ERR_MSG(extack, "Neighbour entry not found"); return -ENOENT; } err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq); neigh_release(neigh); return err; } void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) { int chain; struct neigh_hash_table *nht; rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); read_lock(&tbl->lock); /* avoid resizes */ for (chain = 0; chain < (1 << nht->hash_shift); chain++) { struct neighbour *n; for (n = rcu_dereference_bh(nht->hash_buckets[chain]); n != NULL; n = rcu_dereference_bh(n->next)) cb(n, cookie); } read_unlock(&tbl->lock); rcu_read_unlock_bh(); } EXPORT_SYMBOL(neigh_for_each); /* The tbl->lock must be held as a writer and BH disabled. */ void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *)) { int chain; struct neigh_hash_table *nht; nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); for (chain = 0; chain < (1 << nht->hash_shift); chain++) { struct neighbour *n; struct neighbour __rcu **np; np = &nht->hash_buckets[chain]; while ((n = rcu_dereference_protected(*np, lockdep_is_held(&tbl->lock))) != NULL) { int release; write_lock(&n->lock); release = cb(n); if (release) { rcu_assign_pointer(*np, rcu_dereference_protected(n->next, lockdep_is_held(&tbl->lock))); neigh_mark_dead(n); } else np = &n->next; write_unlock(&n->lock); if (release) neigh_cleanup_and_release(n); } } } EXPORT_SYMBOL(__neigh_for_each_release); int neigh_xmit(int index, struct net_device *dev, const void *addr, struct sk_buff *skb) { int err = -EAFNOSUPPORT; if (likely(index < NEIGH_NR_TABLES)) { struct neigh_table *tbl; struct neighbour *neigh; tbl = neigh_tables[index]; if (!tbl) goto out; rcu_read_lock_bh(); if (index == NEIGH_ARP_TABLE) { u32 key = *((u32 *)addr); neigh = __ipv4_neigh_lookup_noref(dev, key); } else { neigh = __neigh_lookup_noref(tbl, addr, dev); } if (!neigh) neigh = __neigh_create(tbl, addr, dev, false); err = PTR_ERR(neigh); if (IS_ERR(neigh)) { rcu_read_unlock_bh(); goto out_kfree_skb; } err = neigh->output(neigh, skb); rcu_read_unlock_bh(); } else if (index == NEIGH_LINK_TABLE) { err = dev_hard_header(skb, dev, ntohs(skb->protocol), addr, NULL, skb->len); if (err < 0) goto out_kfree_skb; err = dev_queue_xmit(skb); } out: return err; out_kfree_skb: kfree_skb(skb); goto out; } EXPORT_SYMBOL(neigh_xmit); #ifdef CONFIG_PROC_FS static struct neighbour *neigh_get_first(struct seq_file *seq) { struct neigh_seq_state *state = seq->private; struct net *net = seq_file_net(seq); struct neigh_hash_table *nht = state->nht; struct neighbour *n = NULL; int bucket; state->flags &= ~NEIGH_SEQ_IS_PNEIGH; for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { n = rcu_dereference_bh(nht->hash_buckets[bucket]); while (n) { if (!net_eq(dev_net(n->dev), net)) goto next; if (state->neigh_sub_iter) { loff_t fakep = 0; void *v; v = state->neigh_sub_iter(state, n, &fakep); if (!v) goto next; } if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) break; if (n->nud_state & ~NUD_NOARP) break; next: n = rcu_dereference_bh(n->next); } if (n) break; } state->bucket = bucket; return n; } static struct neighbour *neigh_get_next(struct seq_file *seq, struct neighbour *n, loff_t *pos) { struct neigh_seq_state *state = seq->private; struct net *net = seq_file_net(seq); struct neigh_hash_table *nht = state->nht; if (state->neigh_sub_iter) { void *v = state->neigh_sub_iter(state, n, pos); if (v) return n; } n = rcu_dereference_bh(n->next); while (1) { while (n) { if (!net_eq(dev_net(n->dev), net)) goto next; if (state->neigh_sub_iter) { void *v = state->neigh_sub_iter(state, n, pos); if (v) return n; goto next; } if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) break; if (n->nud_state & ~NUD_NOARP) break; next: n = rcu_dereference_bh(n->next); } if (n) break; if (++state->bucket >= (1 << nht->hash_shift)) break; n = rcu_dereference_bh(nht->hash_buckets[state->bucket]); } if (n && pos) --(*pos); return n; } static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) { struct neighbour *n = neigh_get_first(seq); if (n) { --(*pos); while (*pos) { n = neigh_get_next(seq, n, pos); if (!n) break; } } return *pos ? NULL : n; } static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) { struct neigh_seq_state *state = seq->private; struct net *net = seq_file_net(seq); struct neigh_table *tbl = state->tbl; struct pneigh_entry *pn = NULL; int bucket = state->bucket; state->flags |= NEIGH_SEQ_IS_PNEIGH; for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { pn = tbl->phash_buckets[bucket]; while (pn && !net_eq(pneigh_net(pn), net)) pn = pn->next; if (pn) break; } state->bucket = bucket; return pn; } static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, struct pneigh_entry *pn, loff_t *pos) { struct neigh_seq_state *state = seq->private; struct net *net = seq_file_net(seq); struct neigh_table *tbl = state->tbl; do { pn = pn->next; } while (pn && !net_eq(pneigh_net(pn), net)); while (!pn) { if (++state->bucket > PNEIGH_HASHMASK) break; pn = tbl->phash_buckets[state->bucket]; while (pn && !net_eq(pneigh_net(pn), net)) pn = pn->next; if (pn) break; } if (pn && pos) --(*pos); return pn; } static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos) { struct pneigh_entry *pn = pneigh_get_first(seq); if (pn) { --(*pos); while (*pos) { pn = pneigh_get_next(seq, pn, pos); if (!pn) break; } } return *pos ? NULL : pn; } static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) { struct neigh_seq_state *state = seq->private; void *rc; loff_t idxpos = *pos; rc = neigh_get_idx(seq, &idxpos); if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY)) rc = pneigh_get_idx(seq, &idxpos); return rc; } void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) __acquires(tbl->lock) __acquires(rcu_bh) { struct neigh_seq_state *state = seq->private; state->tbl = tbl; state->bucket = 0; state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); rcu_read_lock_bh(); state->nht = rcu_dereference_bh(tbl->nht); read_lock(&tbl->lock); return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; } EXPORT_SYMBOL(neigh_seq_start); void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct neigh_seq_state *state; void *rc; if (v == SEQ_START_TOKEN) { rc = neigh_get_first(seq); goto out; } state = seq->private; if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) { rc = neigh_get_next(seq, v, NULL); if (rc) goto out; if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY)) rc = pneigh_get_first(seq); } else { BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY); rc = pneigh_get_next(seq, v, NULL); } out: ++(*pos); return rc; } EXPORT_SYMBOL(neigh_seq_next); void neigh_seq_stop(struct seq_file *seq, void *v) __releases(tbl->lock) __releases(rcu_bh) { struct neigh_seq_state *state = seq->private; struct neigh_table *tbl = state->tbl; read_unlock(&tbl->lock); rcu_read_unlock_bh(); } EXPORT_SYMBOL(neigh_seq_stop); /* statistics via seq_file */ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) { struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); int cpu; if (*pos == 0) return SEQ_START_TOKEN; for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; return per_cpu_ptr(tbl->stats, cpu); } return NULL; } static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); int cpu; for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; return per_cpu_ptr(tbl->stats, cpu); } (*pos)++; return NULL; } static void neigh_stat_seq_stop(struct seq_file *seq, void *v) { } static int neigh_stat_seq_show(struct seq_file *seq, void *v) { struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); struct neigh_statistics *st = v; if (v == SEQ_START_TOKEN) { seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n"); return 0; } seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " "%08lx %08lx %08lx %08lx %08lx %08lx\n", atomic_read(&tbl->entries), st->allocs, st->destroys, st->hash_grows, st->lookups, st->hits, st->res_failed, st->rcv_probes_mcast, st->rcv_probes_ucast, st->periodic_gc_runs, st->forced_gc_runs, st->unres_discards, st->table_fulls ); return 0; } static const struct seq_operations neigh_stat_seq_ops = { .start = neigh_stat_seq_start, .next = neigh_stat_seq_next, .stop = neigh_stat_seq_stop, .show = neigh_stat_seq_show, }; #endif /* CONFIG_PROC_FS */ static void __neigh_notify(struct neighbour *n, int type, int flags, u32 pid) { struct net *net = dev_net(n->dev); struct sk_buff *skb; int err = -ENOBUFS; skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC); if (skb == NULL) goto errout; err = neigh_fill_info(skb, n, pid, 0, type, flags); if (err < 0) { /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } void neigh_app_ns(struct neighbour *n) { __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0); } EXPORT_SYMBOL(neigh_app_ns); #ifdef CONFIG_SYSCTL static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); static int proc_unres_qlen(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { int size, ret; struct ctl_table tmp = *ctl; tmp.extra1 = SYSCTL_ZERO; tmp.extra2 = &unres_qlen_max; tmp.data = &size; size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && !ret) *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); return ret; } static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, int family) { switch (family) { case AF_INET: return __in_dev_arp_parms_get_rcu(dev); case AF_INET6: return __in6_dev_nd_parms_get_rcu(dev); } return NULL; } static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, int index) { struct net_device *dev; int family = neigh_parms_family(p); rcu_read_lock(); for_each_netdev_rcu(net, dev) { struct neigh_parms *dst_p = neigh_get_dev_parms_rcu(dev, family); if (dst_p && !test_bit(index, dst_p->data_state)) dst_p->data[index] = p->data[index]; } rcu_read_unlock(); } static void neigh_proc_update(struct ctl_table *ctl, int write) { struct net_device *dev = ctl->extra1; struct neigh_parms *p = ctl->extra2; struct net *net = neigh_parms_net(p); int index = (int *) ctl->data - p->data; if (!write) return; set_bit(index, p->data_state); if (index == NEIGH_VAR_DELAY_PROBE_TIME) call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); if (!dev) /* NULL dev means this is default value */ neigh_copy_dflt_parms(net, p, index); } static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table tmp = *ctl; int ret; tmp.extra1 = SYSCTL_ZERO; tmp.extra2 = SYSCTL_INT_MAX; ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); neigh_proc_update(ctl, write); return ret; } int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); neigh_proc_update(ctl, write); return ret; } EXPORT_SYMBOL(neigh_proc_dointvec); int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); neigh_proc_update(ctl, write); return ret; } EXPORT_SYMBOL(neigh_proc_dointvec_jiffies); static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos); neigh_proc_update(ctl, write); return ret; } int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); neigh_proc_update(ctl, write); return ret; } EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies); static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos); neigh_proc_update(ctl, write); return ret; } static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct neigh_parms *p = ctl->extra2; int ret; if (strcmp(ctl->procname, "base_reachable_time") == 0) ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0) ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); else ret = -1; if (write && ret == 0) { /* update reachable_time as well, otherwise, the change will * only be effective after the next time neigh_periodic_work * decides to recompute it */ p->reachable_time = neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); } return ret; } #define NEIGH_PARMS_DATA_OFFSET(index) \ (&((struct neigh_parms *) 0)->data[index]) #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \ [NEIGH_VAR_ ## attr] = { \ .procname = name, \ .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \ .maxlen = sizeof(int), \ .mode = mval, \ .proc_handler = proc, \ } #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \ NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax) #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \ NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies) #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \ NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies) #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \ NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies) #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \ NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen) static struct neigh_sysctl_table { struct ctl_table_header *sysctl_header; struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; } neigh_sysctl_template __read_mostly = { .neigh_vars = { NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"), NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"), NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"), NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"), NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"), NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"), NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"), NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"), NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"), NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"), NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"), NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"), [NEIGH_VAR_GC_INTERVAL] = { .procname = "gc_interval", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NEIGH_VAR_GC_THRESH1] = { .procname = "gc_thresh1", .maxlen = sizeof(int), .mode = 0644, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_GC_THRESH2] = { .procname = "gc_thresh2", .maxlen = sizeof(int), .mode = 0644, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_GC_THRESH3] = { .procname = "gc_thresh3", .maxlen = sizeof(int), .mode = 0644, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, .proc_handler = proc_dointvec_minmax, }, {}, }, }; int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, proc_handler *handler) { int i; struct neigh_sysctl_table *t; const char *dev_name_source; char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; char *p_name; t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); if (!t) goto err; for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) { t->neigh_vars[i].data += (long) p; t->neigh_vars[i].extra1 = dev; t->neigh_vars[i].extra2 = p; } if (dev) { dev_name_source = dev->name; /* Terminate the table early */ memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); } else { struct neigh_table *tbl = p->tbl; dev_name_source = "default"; t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval; t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1; t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2; t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3; } if (handler) { /* RetransTime */ t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; /* ReachableTime */ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; /* RetransTime (in milliseconds)*/ t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; /* ReachableTime (in milliseconds) */ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; } else { /* Those handlers will update p->reachable_time after * base_reachable_time(_ms) is set to ensure the new timer starts being * applied after the next neighbour update instead of waiting for * neigh_periodic_work to update its value (can be multiple minutes) * So any handler that replaces them should do this as well */ /* ReachableTime */ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = neigh_proc_base_reachable_time; /* ReachableTime (in milliseconds) */ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = neigh_proc_base_reachable_time; } /* Don't export sysctls to unprivileged users */ if (neigh_parms_net(p)->user_ns != &init_user_ns) t->neigh_vars[0].procname = NULL; switch (neigh_parms_family(p)) { case AF_INET: p_name = "ipv4"; break; case AF_INET6: p_name = "ipv6"; break; default: BUG(); } snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", p_name, dev_name_source); t->sysctl_header = register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars); if (!t->sysctl_header) goto free; p->sysctl_table = t; return 0; free: kfree(t); err: return -ENOBUFS; } EXPORT_SYMBOL(neigh_sysctl_register); void neigh_sysctl_unregister(struct neigh_parms *p) { if (p->sysctl_table) { struct neigh_sysctl_table *t = p->sysctl_table; p->sysctl_table = NULL; unregister_net_sysctl_table(t->sysctl_header); kfree(t); } } EXPORT_SYMBOL(neigh_sysctl_unregister); #endif /* CONFIG_SYSCTL */ static int __init neigh_init(void) { rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0); rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, 0); rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0); return 0; } subsys_initcall(neigh_init);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_UACCESS_H__ #define __LINUX_UACCESS_H__ #include <linux/fault-inject-usercopy.h> #include <linux/instrumented.h> #include <linux/minmax.h> #include <linux/sched.h> #include <linux/thread_info.h> #include <asm/uaccess.h> #ifdef CONFIG_SET_FS /* * Force the uaccess routines to be wired up for actual userspace access, * overriding any possible set_fs(KERNEL_DS) still lingering around. Undone * using force_uaccess_end below. */ static inline mm_segment_t force_uaccess_begin(void) { mm_segment_t fs = get_fs(); set_fs(USER_DS); return fs; } static inline void force_uaccess_end(mm_segment_t oldfs) { set_fs(oldfs); } #else /* CONFIG_SET_FS */ typedef struct { /* empty dummy */ } mm_segment_t; #ifndef TASK_SIZE_MAX #define TASK_SIZE_MAX TASK_SIZE #endif #define uaccess_kernel() (false) #define user_addr_max() (TASK_SIZE_MAX) static inline mm_segment_t force_uaccess_begin(void) { return (mm_segment_t) { }; } static inline void force_uaccess_end(mm_segment_t oldfs) { } #endif /* CONFIG_SET_FS */ /* * Architectures should provide two primitives (raw_copy_{to,from}_user()) * and get rid of their private instances of copy_{to,from}_user() and * __copy_{to,from}_user{,_inatomic}(). * * raw_copy_{to,from}_user(to, from, size) should copy up to size bytes and * return the amount left to copy. They should assume that access_ok() has * already been checked (and succeeded); they should *not* zero-pad anything. * No KASAN or object size checks either - those belong here. * * Both of these functions should attempt to copy size bytes starting at from * into the area starting at to. They must not fetch or store anything * outside of those areas. Return value must be between 0 (everything * copied successfully) and size (nothing copied). * * If raw_copy_{to,from}_user(to, from, size) returns N, size - N bytes starting * at to must become equal to the bytes fetched from the corresponding area * starting at from. All data past to + size - N must be left unmodified. * * If copying succeeds, the return value must be 0. If some data cannot be * fetched, it is permitted to copy less than had been fetched; the only * hard requirement is that not storing anything at all (i.e. returning size) * should happen only when nothing could be copied. In other words, you don't * have to squeeze as much as possible - it is allowed, but not necessary. * * For raw_copy_from_user() to always points to kernel memory and no faults * on store should happen. Interpretation of from is affected by set_fs(). * For raw_copy_to_user() it's the other way round. * * Both can be inlined - it's up to architectures whether it wants to bother * with that. They should not be used directly; they are used to implement * the 6 functions (copy_{to,from}_user(), __copy_{to,from}_user_inatomic()) * that are used instead. Out of those, __... ones are inlined. Plain * copy_{to,from}_user() might or might not be inlined. If you want them * inlined, have asm/uaccess.h define INLINE_COPY_{TO,FROM}_USER. * * NOTE: only copy_from_user() zero-pads the destination in case of short copy. * Neither __copy_from_user() nor __copy_from_user_inatomic() zero anything * at all; their callers absolutely must check the return value. * * Biarch ones should also provide raw_copy_in_user() - similar to the above, * but both source and destination are __user pointers (affected by set_fs() * as usual) and both source and destination can trigger faults. */ static __always_inline __must_check unsigned long __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) { instrument_copy_from_user(to, from, n); check_object_size(to, n, false); return raw_copy_from_user(to, from, n); } static __always_inline __must_check unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { might_fault(); if (should_fail_usercopy()) return n; instrument_copy_from_user(to, from, n); check_object_size(to, n, false); return raw_copy_from_user(to, from, n); } /** * __copy_to_user_inatomic: - Copy a block of data into user space, with less checking. * @to: Destination address, in user space. * @from: Source address, in kernel space. * @n: Number of bytes to copy. * * Context: User context only. * * Copy data from kernel space to user space. Caller must check * the specified block with access_ok() before calling this function. * The caller should also make sure he pins the user space address * so that we don't result in page fault and sleep. */ static __always_inline __must_check unsigned long __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) { if (should_fail_usercopy()) return n; instrument_copy_to_user(to, from, n); check_object_size(from, n, true); return raw_copy_to_user(to, from, n); } static __always_inline __must_check unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); if (should_fail_usercopy()) return n; instrument_copy_to_user(to, from, n); check_object_size(from, n, true); return raw_copy_to_user(to, from, n); } #ifdef INLINE_COPY_FROM_USER static inline __must_check unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); if (!should_fail_usercopy() && likely(access_ok(from, n))) { instrument_copy_from_user(to, from, n); res = raw_copy_from_user(to, from, n); } if (unlikely(res)) memset(to + (n - res), 0, res); return res; } #else extern __must_check unsigned long _copy_from_user(void *, const void __user *, unsigned long); #endif #ifdef INLINE_COPY_TO_USER static inline __must_check unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); if (should_fail_usercopy()) return n; if (access_ok(to, n)) { instrument_copy_to_user(to, from, n); n = raw_copy_to_user(to, from, n); } return n; } #else extern __must_check unsigned long _copy_to_user(void __user *, const void *, unsigned long); #endif static __always_inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { if (likely(check_copy_size(to, n, false))) n = _copy_from_user(to, from, n); return n; } static __always_inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { if (likely(check_copy_size(from, n, true))) n = _copy_to_user(to, from, n); return n; } #ifdef CONFIG_COMPAT static __always_inline unsigned long __must_check copy_in_user(void __user *to, const void __user *from, unsigned long n) { might_fault(); if (access_ok(to, n) && access_ok(from, n)) n = raw_copy_in_user(to, from, n); return n; } #endif #ifndef copy_mc_to_kernel /* * Without arch opt-in this generic copy_mc_to_kernel() will not handle * #MC (or arch equivalent) during source read. */ static inline unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, size_t cnt) { memcpy(dst, src, cnt); return 0; } #endif static __always_inline void pagefault_disabled_inc(void) { current->pagefault_disabled++; } static __always_inline void pagefault_disabled_dec(void) { current->pagefault_disabled--; } /* * These routines enable/disable the pagefault handler. If disabled, it will * not take any locks and go straight to the fixup table. * * User access methods will not sleep when called from a pagefault_disabled() * environment. */ static inline void pagefault_disable(void) { pagefault_disabled_inc(); /* * make sure to have issued the store before a pagefault * can hit. */ barrier(); } static inline void pagefault_enable(void) { /* * make sure to issue those last loads/stores before enabling * the pagefault handler again. */ barrier(); pagefault_disabled_dec(); } /* * Is the pagefault handler disabled? If so, user access methods will not sleep. */ static inline bool pagefault_disabled(void) { return current->pagefault_disabled != 0; } /* * The pagefault handler is in general disabled by pagefault_disable() or * when in irq context (via in_atomic()). * * This function should only be used by the fault handlers. Other users should * stick to pagefault_disabled(). * Please NEVER use preempt_disable() to disable the fault handler. With * !CONFIG_PREEMPT_COUNT, this is like a NOP. So the handler won't be disabled. * in_atomic() will report different values based on !CONFIG_PREEMPT_COUNT. */ #define faulthandler_disabled() (pagefault_disabled() || in_atomic()) #ifndef ARCH_HAS_NOCACHE_UACCESS static inline __must_check unsigned long __copy_from_user_inatomic_nocache(void *to, const void __user *from, unsigned long n) { return __copy_from_user_inatomic(to, from, n); } #endif /* ARCH_HAS_NOCACHE_UACCESS */ extern __must_check int check_zeroed_user(const void __user *from, size_t size); /** * copy_struct_from_user: copy a struct from userspace * @dst: Destination address, in kernel space. This buffer must be @ksize * bytes long. * @ksize: Size of @dst struct. * @src: Source address, in userspace. * @usize: (Alleged) size of @src struct. * * Copies a struct from userspace to kernel space, in a way that guarantees * backwards-compatibility for struct syscall arguments (as long as future * struct extensions are made such that all new fields are *appended* to the * old struct, and zeroed-out new fields have the same meaning as the old * struct). * * @ksize is just sizeof(*dst), and @usize should've been passed by userspace. * The recommended usage is something like the following: * * SYSCALL_DEFINE2(foobar, const struct foo __user *, uarg, size_t, usize) * { * int err; * struct foo karg = {}; * * if (usize > PAGE_SIZE) * return -E2BIG; * if (usize < FOO_SIZE_VER0) * return -EINVAL; * * err = copy_struct_from_user(&karg, sizeof(karg), uarg, usize); * if (err) * return err; * * // ... * } * * There are three cases to consider: * * If @usize == @ksize, then it's copied verbatim. * * If @usize < @ksize, then the userspace has passed an old struct to a * newer kernel. The rest of the trailing bytes in @dst (@ksize - @usize) * are to be zero-filled. * * If @usize > @ksize, then the userspace has passed a new struct to an * older kernel. The trailing bytes unknown to the kernel (@usize - @ksize) * are checked to ensure they are zeroed, otherwise -E2BIG is returned. * * Returns (in all cases, some data may have been copied): * * -E2BIG: (@usize > @ksize) and there are non-zero trailing bytes in @src. * * -EFAULT: access to userspace failed. */ static __always_inline __must_check int copy_struct_from_user(void *dst, size_t ksize, const void __user *src, size_t usize) { size_t size = min(ksize, usize); size_t rest = max(ksize, usize) - size; /* Deal with trailing bytes. */ if (usize < ksize) { memset(dst + size, 0, rest); } else if (usize > ksize) { int ret = check_zeroed_user(src + size, rest); if (ret <= 0) return ret ?: -E2BIG; } /* Copy the interoperable parts of the struct. */ if (copy_from_user(dst, src, size)) return -EFAULT; return 0; } bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size); long copy_from_kernel_nofault(void *dst, const void *src, size_t size); long notrace copy_to_kernel_nofault(void *dst, const void *src, size_t size); long copy_from_user_nofault(void *dst, const void __user *src, size_t size); long notrace copy_to_user_nofault(void __user *dst, const void *src, size_t size); long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count); long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, long count); long strnlen_user_nofault(const void __user *unsafe_addr, long count); /** * get_kernel_nofault(): safely attempt to read from a location * @val: read into this variable * @ptr: address to read from * * Returns 0 on success, or -EFAULT. */ #define get_kernel_nofault(val, ptr) ({ \ const typeof(val) *__gk_ptr = (ptr); \ copy_from_kernel_nofault(&(val), __gk_ptr, sizeof(val));\ }) #ifndef user_access_begin #define user_access_begin(ptr,len) access_ok(ptr, len) #define user_access_end() do { } while (0) #define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0) #define unsafe_get_user(x,p,e) unsafe_op_wrap(__get_user(x,p),e) #define unsafe_put_user(x,p,e) unsafe_op_wrap(__put_user(x,p),e) #define unsafe_copy_to_user(d,s,l,e) unsafe_op_wrap(__copy_to_user(d,s,l),e) static inline unsigned long user_access_save(void) { return 0UL; } static inline void user_access_restore(unsigned long flags) { } #endif #ifndef user_write_access_begin #define user_write_access_begin user_access_begin #define user_write_access_end user_access_end #endif #ifndef user_read_access_begin #define user_read_access_begin user_access_begin #define user_read_access_end user_access_end #endif #ifdef CONFIG_HARDENED_USERCOPY void usercopy_warn(const char *name, const char *detail, bool to_user, unsigned long offset, unsigned long len); void __noreturn usercopy_abort(const char *name, const char *detail, bool to_user, unsigned long offset, unsigned long len); #endif #endif /* __LINUX_UACCESS_H__ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 /* SPDX-License-Identifier: GPL-2.0 */ /* * Percpu refcounts: * (C) 2012 Google, Inc. * Author: Kent Overstreet <koverstreet@google.com> * * This implements a refcount with similar semantics to atomic_t - atomic_inc(), * atomic_dec_and_test() - but percpu. * * There's one important difference between percpu refs and normal atomic_t * refcounts; you have to keep track of your initial refcount, and then when you * start shutting down you call percpu_ref_kill() _before_ dropping the initial * refcount. * * The refcount will have a range of 0 to ((1U << 31) - 1), i.e. one bit less * than an atomic_t - this is because of the way shutdown works, see * percpu_ref_kill()/PERCPU_COUNT_BIAS. * * Before you call percpu_ref_kill(), percpu_ref_put() does not check for the * refcount hitting 0 - it can't, if it was in percpu mode. percpu_ref_kill() * puts the ref back in single atomic_t mode, collecting the per cpu refs and * issuing the appropriate barriers, and then marks the ref as shutting down so * that percpu_ref_put() will check for the ref hitting 0. After it returns, * it's safe to drop the initial ref. * * USAGE: * * See fs/aio.c for some example usage; it's used there for struct kioctx, which * is created when userspaces calls io_setup(), and destroyed when userspace * calls io_destroy() or the process exits. * * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it * removes the kioctx from the proccess's table of kioctxs and kills percpu_ref. * After that, there can't be any new users of the kioctx (from lookup_ioctx()) * and it's then safe to drop the initial ref with percpu_ref_put(). * * Note that the free path, free_ioctx(), needs to go through explicit call_rcu() * to synchronize with RCU protected lookup_ioctx(). percpu_ref operations don't * imply RCU grace periods of any kind and if a user wants to combine percpu_ref * with RCU protection, it must be done explicitly. * * Code that does a two stage shutdown like this often needs some kind of * explicit synchronization to ensure the initial refcount can only be dropped * once - percpu_ref_kill() does this for you, it returns true once and false if * someone else already called it. The aio code uses it this way, but it's not * necessary if the code has some other mechanism to synchronize teardown. * around. */ #ifndef _LINUX_PERCPU_REFCOUNT_H #define _LINUX_PERCPU_REFCOUNT_H #include <linux/atomic.h> #include <linux/kernel.h> #include <linux/percpu.h> #include <linux/rcupdate.h> #include <linux/gfp.h> struct percpu_ref; typedef void (percpu_ref_func_t)(struct percpu_ref *); /* flags set in the lower bits of percpu_ref->percpu_count_ptr */ enum { __PERCPU_REF_ATOMIC = 1LU << 0, /* operating in atomic mode */ __PERCPU_REF_DEAD = 1LU << 1, /* (being) killed */ __PERCPU_REF_ATOMIC_DEAD = __PERCPU_REF_ATOMIC | __PERCPU_REF_DEAD, __PERCPU_REF_FLAG_BITS = 2, }; /* @flags for percpu_ref_init() */ enum { /* * Start w/ ref == 1 in atomic mode. Can be switched to percpu * operation using percpu_ref_switch_to_percpu(). If initialized * with this flag, the ref will stay in atomic mode until * percpu_ref_switch_to_percpu() is invoked on it. * Implies ALLOW_REINIT. */ PERCPU_REF_INIT_ATOMIC = 1 << 0, /* * Start dead w/ ref == 0 in atomic mode. Must be revived with * percpu_ref_reinit() before used. Implies INIT_ATOMIC and * ALLOW_REINIT. */ PERCPU_REF_INIT_DEAD = 1 << 1, /* * Allow switching from atomic mode to percpu mode. */ PERCPU_REF_ALLOW_REINIT = 1 << 2, }; struct percpu_ref_data { atomic_long_t count; percpu_ref_func_t *release; percpu_ref_func_t *confirm_switch; bool force_atomic:1; bool allow_reinit:1; struct rcu_head rcu; struct percpu_ref *ref; }; struct percpu_ref { /* * The low bit of the pointer indicates whether the ref is in percpu * mode; if set, then get/put will manipulate the atomic_t. */ unsigned long percpu_count_ptr; /* * 'percpu_ref' is often embedded into user structure, and only * 'percpu_count_ptr' is required in fast path, move other fields * into 'percpu_ref_data', so we can reduce memory footprint in * fast path. */ struct percpu_ref_data *data; }; int __must_check percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, unsigned int flags, gfp_t gfp); void percpu_ref_exit(struct percpu_ref *ref); void percpu_ref_switch_to_atomic(struct percpu_ref *ref, percpu_ref_func_t *confirm_switch); void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref); void percpu_ref_switch_to_percpu(struct percpu_ref *ref); void percpu_ref_kill_and_confirm(struct percpu_ref *ref, percpu_ref_func_t *confirm_kill); void percpu_ref_resurrect(struct percpu_ref *ref); void percpu_ref_reinit(struct percpu_ref *ref); bool percpu_ref_is_zero(struct percpu_ref *ref); /** * percpu_ref_kill - drop the initial ref * @ref: percpu_ref to kill * * Must be used to drop the initial ref on a percpu refcount; must be called * precisely once before shutdown. * * Switches @ref into atomic mode before gathering up the percpu counters * and dropping the initial ref. * * There are no implied RCU grace periods between kill and release. */ static inline void percpu_ref_kill(struct percpu_ref *ref) { percpu_ref_kill_and_confirm(ref, NULL); } /* * Internal helper. Don't use outside percpu-refcount proper. The * function doesn't return the pointer and let the caller test it for NULL * because doing so forces the compiler to generate two conditional * branches as it can't assume that @ref->percpu_count is not NULL. */ static inline bool __ref_is_percpu(struct percpu_ref *ref, unsigned long __percpu **percpu_countp) { unsigned long percpu_ptr; /* * The value of @ref->percpu_count_ptr is tested for * !__PERCPU_REF_ATOMIC, which may be set asynchronously, and then * used as a pointer. If the compiler generates a separate fetch * when using it as a pointer, __PERCPU_REF_ATOMIC may be set in * between contaminating the pointer value, meaning that * READ_ONCE() is required when fetching it. * * The dependency ordering from the READ_ONCE() pairs * with smp_store_release() in __percpu_ref_switch_to_percpu(). */ percpu_ptr = READ_ONCE(ref->percpu_count_ptr); /* * Theoretically, the following could test just ATOMIC; however, * then we'd have to mask off DEAD separately as DEAD may be * visible without ATOMIC if we race with percpu_ref_kill(). DEAD * implies ATOMIC anyway. Test them together. */ if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC_DEAD)) return false; *percpu_countp = (unsigned long __percpu *)percpu_ptr; return true; } /** * percpu_ref_get_many - increment a percpu refcount * @ref: percpu_ref to get * @nr: number of references to get * * Analogous to atomic_long_add(). * * This function is safe to call as long as @ref is between init and exit. */ static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr) { unsigned long __percpu *percpu_count; rcu_read_lock(); if (__ref_is_percpu(ref, &percpu_count)) this_cpu_add(*percpu_count, nr); else atomic_long_add(nr, &ref->data->count); rcu_read_unlock(); } /** * percpu_ref_get - increment a percpu refcount * @ref: percpu_ref to get * * Analagous to atomic_long_inc(). * * This function is safe to call as long as @ref is between init and exit. */ static inline void percpu_ref_get(struct percpu_ref *ref) { percpu_ref_get_many(ref, 1); } /** * percpu_ref_tryget_many - try to increment a percpu refcount * @ref: percpu_ref to try-get * @nr: number of references to get * * Increment a percpu refcount by @nr unless its count already reached zero. * Returns %true on success; %false on failure. * * This function is safe to call as long as @ref is between init and exit. */ static inline bool percpu_ref_tryget_many(struct percpu_ref *ref, unsigned long nr) { unsigned long __percpu *percpu_count; bool ret; rcu_read_lock(); if (__ref_is_percpu(ref, &percpu_count)) { this_cpu_add(*percpu_count, nr); ret = true; } else { ret = atomic_long_add_unless(&ref->data->count, nr, 0); } rcu_read_unlock(); return ret; } /** * percpu_ref_tryget - try to increment a percpu refcount * @ref: percpu_ref to try-get * * Increment a percpu refcount unless its count already reached zero. * Returns %true on success; %false on failure. * * This function is safe to call as long as @ref is between init and exit. */ static inline bool percpu_ref_tryget(struct percpu_ref *ref) { return percpu_ref_tryget_many(ref, 1); } /** * percpu_ref_tryget_live - try to increment a live percpu refcount * @ref: percpu_ref to try-get * * Increment a percpu refcount unless it has already been killed. Returns * %true on success; %false on failure. * * Completion of percpu_ref_kill() in itself doesn't guarantee that this * function will fail. For such guarantee, percpu_ref_kill_and_confirm() * should be used. After the confirm_kill callback is invoked, it's * guaranteed that no new reference will be given out by * percpu_ref_tryget_live(). * * This function is safe to call as long as @ref is between init and exit. */ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) { unsigned long __percpu *percpu_count; bool ret = false; rcu_read_lock(); if (__ref_is_percpu(ref, &percpu_count)) { this_cpu_inc(*percpu_count); ret = true; } else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) { ret = atomic_long_inc_not_zero(&ref->data->count); } rcu_read_unlock(); return ret; } /** * percpu_ref_put_many - decrement a percpu refcount * @ref: percpu_ref to put * @nr: number of references to put * * Decrement the refcount, and if 0, call the release function (which was passed * to percpu_ref_init()) * * This function is safe to call as long as @ref is between init and exit. */ static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr) { unsigned long __percpu *percpu_count; rcu_read_lock(); if (__ref_is_percpu(ref, &percpu_count)) this_cpu_sub(*percpu_count, nr); else if (unlikely(atomic_long_sub_and_test(nr, &ref->data->count))) ref->data->release(ref); rcu_read_unlock(); } /** * percpu_ref_put - decrement a percpu refcount * @ref: percpu_ref to put * * Decrement the refcount, and if 0, call the release function (which was passed * to percpu_ref_init()) * * This function is safe to call as long as @ref is between init and exit. */ static inline void percpu_ref_put(struct percpu_ref *ref) { percpu_ref_put_many(ref, 1); } /** * percpu_ref_is_dying - test whether a percpu refcount is dying or dead * @ref: percpu_ref to test * * Returns %true if @ref is dying or dead. * * This function is safe to call as long as @ref is between init and exit * and the caller is responsible for synchronizing against state changes. */ static inline bool percpu_ref_is_dying(struct percpu_ref *ref) { return ref->percpu_count_ptr & __PERCPU_REF_DEAD; } #endif
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PROCESSOR_H #define _ASM_X86_PROCESSOR_H #include <asm/processor-flags.h> /* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; struct io_bitmap; struct vm86; #include <asm/math_emu.h> #include <asm/segment.h> #include <asm/types.h> #include <uapi/asm/sigcontext.h> #include <asm/current.h> #include <asm/cpufeatures.h> #include <asm/page.h> #include <asm/pgtable_types.h> #include <asm/percpu.h> #include <asm/msr.h> #include <asm/desc_defs.h> #include <asm/nops.h> #include <asm/special_insns.h> #include <asm/fpu/types.h> #include <asm/unwind_hints.h> #include <asm/vmxfeatures.h> #include <asm/vdso/processor.h> #include <linux/personality.h> #include <linux/cache.h> #include <linux/threads.h> #include <linux/math64.h> #include <linux/err.h> #include <linux/irqflags.h> #include <linux/mem_encrypt.h> /* * We handle most unaligned accesses in hardware. On the other hand * unaligned DMA can be quite expensive on some Nehalem processors. * * Based on this we disable the IP header alignment in network drivers. */ #define NET_IP_ALIGN 0 #define HBP_NUM 4 /* * These alignment constraints are for performance in the vSMP case, * but in the task_struct case we must also meet hardware imposed * alignment requirements of the FPU state: */ #ifdef CONFIG_X86_VSMP # define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT) # define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT) #else # define ARCH_MIN_TASKALIGN __alignof__(union fpregs_state) # define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif enum tlb_infos { ENTRIES, NR_INFO }; extern u16 __read_mostly tlb_lli_4k[NR_INFO]; extern u16 __read_mostly tlb_lli_2m[NR_INFO]; extern u16 __read_mostly tlb_lli_4m[NR_INFO]; extern u16 __read_mostly tlb_lld_4k[NR_INFO]; extern u16 __read_mostly tlb_lld_2m[NR_INFO]; extern u16 __read_mostly tlb_lld_4m[NR_INFO]; extern u16 __read_mostly tlb_lld_1g[NR_INFO]; /* * CPU type and hardware bug flags. Kept separately for each CPU. * Members of this structure are referenced in head_32.S, so think twice * before touching them. [mj] */ struct cpuinfo_x86 { __u8 x86; /* CPU family */ __u8 x86_vendor; /* CPU vendor */ __u8 x86_model; __u8 x86_stepping; #ifdef CONFIG_X86_64 /* Number of 4K pages in DTLB/ITLB combined(in pages): */ int x86_tlbsize; #endif #ifdef CONFIG_X86_VMX_FEATURE_NAMES __u32 vmx_capability[NVMXINTS]; #endif __u8 x86_virt_bits; __u8 x86_phys_bits; /* CPUID returned core id bits: */ __u8 x86_coreid_bits; __u8 cu_id; /* Max extended CPUID function supported: */ __u32 extended_cpuid_level; /* Maximum supported CPUID level, -1=no CPUID: */ int cpuid_level; /* * Align to size of unsigned long because the x86_capability array * is passed to bitops which require the alignment. Use unnamed * union to enforce the array is aligned to size of unsigned long. */ union { __u32 x86_capability[NCAPINTS + NBUGINTS]; unsigned long x86_capability_alignment; }; char x86_vendor_id[16]; char x86_model_id[64]; /* in KB - valid for CPUS which support this call: */ unsigned int x86_cache_size; int x86_cache_alignment; /* In bytes */ /* Cache QoS architectural values, valid only on the BSP: */ int x86_cache_max_rmid; /* max index */ int x86_cache_occ_scale; /* scale to bytes */ int x86_cache_mbm_width_offset; int x86_power; unsigned long loops_per_jiffy; /* cpuid returned max cores value: */ u16 x86_max_cores; u16 apicid; u16 initial_apicid; u16 x86_clflush_size; /* number of cores as seen by the OS: */ u16 booted_cores; /* Physical processor id: */ u16 phys_proc_id; /* Logical processor id: */ u16 logical_proc_id; /* Core id: */ u16 cpu_core_id; u16 cpu_die_id; u16 logical_die_id; /* Index into per_cpu list: */ u16 cpu_index; u32 microcode; /* Address space bits used by the cache internally */ u8 x86_cache_bits; unsigned initialized : 1; } __randomize_layout; struct cpuid_regs { u32 eax, ebx, ecx, edx; }; enum cpuid_regs_idx { CPUID_EAX = 0, CPUID_EBX, CPUID_ECX, CPUID_EDX, }; #define X86_VENDOR_INTEL 0 #define X86_VENDOR_CYRIX 1 #define X86_VENDOR_AMD 2 #define X86_VENDOR_UMC 3 #define X86_VENDOR_CENTAUR 5 #define X86_VENDOR_TRANSMETA 7 #define X86_VENDOR_NSC 8 #define X86_VENDOR_HYGON 9 #define X86_VENDOR_ZHAOXIN 10 #define X86_VENDOR_NUM 11 #define X86_VENDOR_UNKNOWN 0xff /* * capabilities of CPUs */ extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 new_cpu_data; extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; #ifdef CONFIG_SMP DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); #define cpu_data(cpu) per_cpu(cpu_info, cpu) #else #define cpu_info boot_cpu_data #define cpu_data(cpu) boot_cpu_data #endif extern const struct seq_operations cpuinfo_op; #define cache_line_size() (boot_cpu_data.x86_cache_alignment) extern void cpu_detect(struct cpuinfo_x86 *c); static inline unsigned long long l1tf_pfn_limit(void) { return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT); } extern void early_cpu_init(void); extern void identify_boot_cpu(void); extern void identify_secondary_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); void print_cpu_msr(struct cpuinfo_x86 *); #ifdef CONFIG_X86_32 extern int have_cpuid_p(void); #else static inline int have_cpuid_p(void) { return 1; } #endif static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { /* ecx is often an input as well as an output. */ asm volatile("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (*eax), "2" (*ecx) : "memory"); } #define native_cpuid_reg(reg) \ static inline unsigned int native_cpuid_##reg(unsigned int op) \ { \ unsigned int eax = op, ebx, ecx = 0, edx; \ \ native_cpuid(&eax, &ebx, &ecx, &edx); \ \ return reg; \ } /* * Native CPUID functions returning a single datum. */ native_cpuid_reg(eax) native_cpuid_reg(ebx) native_cpuid_reg(ecx) native_cpuid_reg(edx) /* * Friendlier CR3 helpers. */ static inline unsigned long read_cr3_pa(void) { return __read_cr3() & CR3_ADDR_MASK; } static inline unsigned long native_read_cr3_pa(void) { return __native_read_cr3() & CR3_ADDR_MASK; } static inline void load_cr3(pgd_t *pgdir) { write_cr3(__sme_pa(pgdir)); } /* * Note that while the legacy 'TSS' name comes from 'Task State Segment', * on modern x86 CPUs the TSS also holds information important to 64-bit mode, * unrelated to the task-switch mechanism: */ #ifdef CONFIG_X86_32 /* This is the TSS defined by the hardware. */ struct x86_hw_tss { unsigned short back_link, __blh; unsigned long sp0; unsigned short ss0, __ss0h; unsigned long sp1; /* * We don't use ring 1, so ss1 is a convenient scratch space in * the same cacheline as sp0. We use ss1 to cache the value in * MSR_IA32_SYSENTER_CS. When we context switch * MSR_IA32_SYSENTER_CS, we first check if the new value being * written matches ss1, and, if it's not, then we wrmsr the new * value and update ss1. * * The only reason we context switch MSR_IA32_SYSENTER_CS is * that we set it to zero in vm86 tasks to avoid corrupting the * stack if we were to go through the sysenter path from vm86 * mode. */ unsigned short ss1; /* MSR_IA32_SYSENTER_CS */ unsigned short __ss1h; unsigned long sp2; unsigned short ss2, __ss2h; unsigned long __cr3; unsigned long ip; unsigned long flags; unsigned long ax; unsigned long cx; unsigned long dx; unsigned long bx; unsigned long sp; unsigned long bp; unsigned long si; unsigned long di; unsigned short es, __esh; unsigned short cs, __csh; unsigned short ss, __ssh; unsigned short ds, __dsh; unsigned short fs, __fsh; unsigned short gs, __gsh; unsigned short ldt, __ldth; unsigned short trace; unsigned short io_bitmap_base; } __attribute__((packed)); #else struct x86_hw_tss { u32 reserved1; u64 sp0; /* * We store cpu_current_top_of_stack in sp1 so it's always accessible. * Linux does not use ring 1, so sp1 is not otherwise needed. */ u64 sp1; /* * Since Linux does not use ring 2, the 'sp2' slot is unused by * hardware. entry_SYSCALL_64 uses it as scratch space to stash * the user RSP value. */ u64 sp2; u64 reserved2; u64 ist[7]; u32 reserved3; u32 reserved4; u16 reserved5; u16 io_bitmap_base; } __attribute__((packed)); #endif /* * IO-bitmap sizes: */ #define IO_BITMAP_BITS 65536 #define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE) #define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long)) #define IO_BITMAP_OFFSET_VALID_MAP \ (offsetof(struct tss_struct, io_bitmap.bitmap) - \ offsetof(struct tss_struct, x86_tss)) #define IO_BITMAP_OFFSET_VALID_ALL \ (offsetof(struct tss_struct, io_bitmap.mapall) - \ offsetof(struct tss_struct, x86_tss)) #ifdef CONFIG_X86_IOPL_IOPERM /* * sizeof(unsigned long) coming from an extra "long" at the end of the * iobitmap. The limit is inclusive, i.e. the last valid byte. */ # define __KERNEL_TSS_LIMIT \ (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \ sizeof(unsigned long) - 1) #else # define __KERNEL_TSS_LIMIT \ (offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1) #endif /* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */ #define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1) struct entry_stack { char stack[PAGE_SIZE]; }; struct entry_stack_page { struct entry_stack stack; } __aligned(PAGE_SIZE); /* * All IO bitmap related data stored in the TSS: */ struct x86_io_bitmap { /* The sequence number of the last active bitmap. */ u64 prev_sequence; /* * Store the dirty size of the last io bitmap offender. The next * one will have to do the cleanup as the switch out to a non io * bitmap user will just set x86_tss.io_bitmap_base to a value * outside of the TSS limit. So for sane tasks there is no need to * actually touch the io_bitmap at all. */ unsigned int prev_max; /* * The extra 1 is there because the CPU will access an * additional byte beyond the end of the IO permission * bitmap. The extra byte must be all 1 bits, and must * be within the limit. */ unsigned long bitmap[IO_BITMAP_LONGS + 1]; /* * Special I/O bitmap to emulate IOPL(3). All bytes zero, * except the additional byte at the end. */ unsigned long mapall[IO_BITMAP_LONGS + 1]; }; struct tss_struct { /* * The fixed hardware portion. This must not cross a page boundary * at risk of violating the SDM's advice and potentially triggering * errata. */ struct x86_hw_tss x86_tss; struct x86_io_bitmap io_bitmap; } __aligned(PAGE_SIZE); DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); /* Per CPU interrupt stacks */ struct irq_stack { char stack[IRQ_STACK_SIZE]; } __aligned(IRQ_STACK_SIZE); DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); #ifdef CONFIG_X86_32 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); #else /* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */ #define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1 #endif #ifdef CONFIG_X86_64 struct fixed_percpu_data { /* * GCC hardcodes the stack canary as %gs:40. Since the * irq_stack is the object at %gs:0, we reserve the bottom * 48 bytes of the irq stack for the canary. */ char gs_base[40]; unsigned long stack_canary; }; DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible; DECLARE_INIT_PER_CPU(fixed_percpu_data); static inline unsigned long cpu_kernelmode_gs_base(int cpu) { return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu); } DECLARE_PER_CPU(unsigned int, irq_count); extern asmlinkage void ignore_sysret(void); /* Save actual FS/GS selectors and bases to current->thread */ void current_save_fsgs(void); #else /* X86_64 */ #ifdef CONFIG_STACKPROTECTOR /* * Make sure stack canary segment base is cached-aligned: * "For Intel Atom processors, avoid non zero segment base address * that is not aligned to cache line boundary at all cost." * (Optim Ref Manual Assembly/Compiler Coding Rule 15.) */ struct stack_canary { char __pad[20]; /* canary at %gs:20 */ unsigned long canary; }; DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); #endif /* Per CPU softirq stack pointer */ DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr); #endif /* X86_64 */ extern unsigned int fpu_kernel_xstate_size; extern unsigned int fpu_user_xstate_size; struct perf_event; struct thread_struct { /* Cached TLS descriptors: */ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; #ifdef CONFIG_X86_32 unsigned long sp0; #endif unsigned long sp; #ifdef CONFIG_X86_32 unsigned long sysenter_cs; #else unsigned short es; unsigned short ds; unsigned short fsindex; unsigned short gsindex; #endif #ifdef CONFIG_X86_64 unsigned long fsbase; unsigned long gsbase; #else /* * XXX: this could presumably be unsigned short. Alternatively, * 32-bit kernels could be taught to use fsindex instead. */ unsigned long fs; unsigned long gs; #endif /* Save middle states of ptrace breakpoints */ struct perf_event *ptrace_bps[HBP_NUM]; /* Debug status used for traps, single steps, etc... */ unsigned long virtual_dr6; /* Keep track of the exact dr7 value set by the user */ unsigned long ptrace_dr7; /* Fault info: */ unsigned long cr2; unsigned long trap_nr; unsigned long error_code; #ifdef CONFIG_VM86 /* Virtual 86 mode info */ struct vm86 *vm86; #endif /* IO permissions: */ struct io_bitmap *io_bitmap; /* * IOPL. Priviledge level dependent I/O permission which is * emulated via the I/O bitmap to prevent user space from disabling * interrupts. */ unsigned long iopl_emul; unsigned int sig_on_uaccess_err:1; /* Floating point and extended processor state */ struct fpu fpu; /* * WARNING: 'fpu' is dynamically-sized. It *MUST* be at * the end. */ }; /* Whitelist the FPU state from the task_struct for hardened usercopy. */ static inline void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size) { *offset = offsetof(struct thread_struct, fpu.state); *size = fpu_kernel_xstate_size; } static inline void native_load_sp0(unsigned long sp0) { this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } static __always_inline void native_swapgs(void) { #ifdef CONFIG_X86_64 asm volatile("swapgs" ::: "memory"); #endif } static inline unsigned long current_top_of_stack(void) { /* * We can't read directly from tss.sp0: sp0 on x86_32 is special in * and around vm86 mode and sp0 on x86_64 is special because of the * entry trampoline. */ return this_cpu_read_stable(cpu_current_top_of_stack); } static inline bool on_thread_stack(void) { return (unsigned long)(current_top_of_stack() - current_stack_pointer) < THREAD_SIZE; } #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #define __cpuid native_cpuid static inline void load_sp0(unsigned long sp0) { native_load_sp0(sp0); } #endif /* CONFIG_PARAVIRT_XXL */ /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); unsigned long get_wchan(struct task_struct *p); /* * Generic CPUID function * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx * resulting in stale register contents being returned. */ static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { *eax = op; *ecx = 0; __cpuid(eax, ebx, ecx, edx); } /* Some CPUID calls want 'count' to be placed in ecx */ static inline void cpuid_count(unsigned int op, int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { *eax = op; *ecx = count; __cpuid(eax, ebx, ecx, edx); } /* * CPUID functions returning a single datum */ static inline unsigned int cpuid_eax(unsigned int op) { unsigned int eax, ebx, ecx, edx; cpuid(op, &eax, &ebx, &ecx, &edx); return eax; } static inline unsigned int cpuid_ebx(unsigned int op) { unsigned int eax, ebx, ecx, edx; cpuid(op, &eax, &ebx, &ecx, &edx); return ebx; } static inline unsigned int cpuid_ecx(unsigned int op) { unsigned int eax, ebx, ecx, edx; cpuid(op, &eax, &ebx, &ecx, &edx); return ecx; } static inline unsigned int cpuid_edx(unsigned int op) { unsigned int eax, ebx, ecx, edx; cpuid(op, &eax, &ebx, &ecx, &edx); return edx; } extern void select_idle_routine(const struct cpuinfo_x86 *c); extern void amd_e400_c1e_apic_setup(void); extern unsigned long boot_option_idle_override; enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, IDLE_POLL}; extern void enable_sep_cpu(void); extern int sysenter_setup(void); /* Defined in head.S */ extern struct desc_ptr early_gdt_descr; extern void switch_to_new_gdt(int); extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); extern void load_percpu_segment(int); extern void cpu_init(void); extern void cpu_init_exception_handling(void); extern void cr4_init(void); static inline unsigned long get_debugctlmsr(void) { unsigned long debugctlmsr = 0; #ifndef CONFIG_X86_DEBUGCTLMSR if (boot_cpu_data.x86 < 6) return 0; #endif rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); return debugctlmsr; } static inline void update_debugctlmsr(unsigned long debugctlmsr) { #ifndef CONFIG_X86_DEBUGCTLMSR if (boot_cpu_data.x86 < 6) return; #endif wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); } extern void set_task_blockstep(struct task_struct *task, bool on); /* Boot loader type from the setup header: */ extern int bootloader_type; extern int bootloader_version; extern char ignore_fpu_irq; #define HAVE_ARCH_PICK_MMAP_LAYOUT 1 #define ARCH_HAS_PREFETCHW #define ARCH_HAS_SPINLOCK_PREFETCH #ifdef CONFIG_X86_32 # define BASE_PREFETCH "" # define ARCH_HAS_PREFETCH #else # define BASE_PREFETCH "prefetcht0 %P1" #endif /* * Prefetch instructions for Pentium III (+) and AMD Athlon (+) * * It's not worth to care about 3dnow prefetches for the K6 * because they are microcoded there and very slow. */ static inline void prefetch(const void *x) { alternative_input(BASE_PREFETCH, "prefetchnta %P1", X86_FEATURE_XMM, "m" (*(const char *)x)); } /* * 3dnow prefetch to get an exclusive cache line. * Useful for spinlocks to avoid one state transition in the * cache coherency protocol: */ static __always_inline void prefetchw(const void *x) { alternative_input(BASE_PREFETCH, "prefetchw %P1", X86_FEATURE_3DNOWPREFETCH, "m" (*(const char *)x)); } static inline void spin_lock_prefetch(const void *x) { prefetchw(x); } #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ TOP_OF_KERNEL_STACK_PADDING) #define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1)) #define task_pt_regs(task) \ ({ \ unsigned long __ptr = (unsigned long)task_stack_page(task); \ __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \ ((struct pt_regs *)__ptr) - 1; \ }) #ifdef CONFIG_X86_32 #define INIT_THREAD { \ .sp0 = TOP_OF_INIT_STACK, \ .sysenter_cs = __KERNEL_CS, \ } #define KSTK_ESP(task) (task_pt_regs(task)->sp) #else #define INIT_THREAD { } extern unsigned long KSTK_ESP(struct task_struct *task); #endif /* CONFIG_X86_64 */ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp); /* * This decides where the kernel will search for a free chunk of vm * space during mmap's. */ #define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3)) #define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW) #define KSTK_EIP(task) (task_pt_regs(task)->ip) /* Get/set a process' ability to use the timestamp counter instruction */ #define GET_TSC_CTL(adr) get_tsc_mode((adr)) #define SET_TSC_CTL(val) set_tsc_mode((val)) extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); DECLARE_PER_CPU(u64, msr_misc_features_shadow); #ifdef CONFIG_CPU_SUP_AMD extern u16 amd_get_nb_id(int cpu); extern u32 amd_get_nodes_per_socket(void); #else static inline u16 amd_get_nb_id(int cpu) { return 0; } static inline u32 amd_get_nodes_per_socket(void) { return 0; } #endif static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) { uint32_t base, eax, signature[3]; for (base = 0x40000000; base < 0x40010000; base += 0x100) { cpuid(base, &eax, &signature[0], &signature[1], &signature[2]); if (!memcmp(sig, signature, 12) && (leaves == 0 || ((eax - base) >= leaves))) return base; } return 0; } extern unsigned long arch_align_stack(unsigned long sp); void free_init_pages(const char *what, unsigned long begin, unsigned long end); extern void free_kernel_image_pages(const char *what, void *begin, void *end); void default_idle(void); #ifdef CONFIG_XEN bool xen_set_default_idle(void); #else #define xen_set_default_idle 0 #endif void stop_this_cpu(void *dummy); void microcode_check(void); enum l1tf_mitigations { L1TF_MITIGATION_OFF, L1TF_MITIGATION_FLUSH_NOWARN, L1TF_MITIGATION_FLUSH, L1TF_MITIGATION_FLUSH_NOSMT, L1TF_MITIGATION_FULL, L1TF_MITIGATION_FULL_FORCE }; extern enum l1tf_mitigations l1tf_mitigation; enum mds_mitigations { MDS_MITIGATION_OFF, MDS_MITIGATION_FULL, MDS_MITIGATION_VMWERV, }; #endif /* _ASM_X86_PROCESSOR_H */
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 // SPDX-License-Identifier: GPL-2.0 // Generated by scripts/atomic/gen-atomic-fallback.sh // DO NOT MODIFY THIS FILE DIRECTLY #ifndef _LINUX_ATOMIC_FALLBACK_H #define _LINUX_ATOMIC_FALLBACK_H #include <linux/compiler.h> #ifndef arch_xchg_relaxed #define arch_xchg_relaxed arch_xchg #define arch_xchg_acquire arch_xchg #define arch_xchg_release arch_xchg #else /* arch_xchg_relaxed */ #ifndef arch_xchg_acquire #define arch_xchg_acquire(...) \ __atomic_op_acquire(arch_xchg, __VA_ARGS__) #endif #ifndef arch_xchg_release #define arch_xchg_release(...) \ __atomic_op_release(arch_xchg, __VA_ARGS__) #endif #ifndef arch_xchg #define arch_xchg(...) \ __atomic_op_fence(arch_xchg, __VA_ARGS__) #endif #endif /* arch_xchg_relaxed */ #ifndef arch_cmpxchg_relaxed #define arch_cmpxchg_relaxed arch_cmpxchg #define arch_cmpxchg_acquire arch_cmpxchg #define arch_cmpxchg_release arch_cmpxchg #else /* arch_cmpxchg_relaxed */ #ifndef arch_cmpxchg_acquire #define arch_cmpxchg_acquire(...) \ __atomic_op_acquire(arch_cmpxchg, __VA_ARGS__) #endif #ifndef arch_cmpxchg_release #define arch_cmpxchg_release(...) \ __atomic_op_release(arch_cmpxchg, __VA_ARGS__) #endif #ifndef arch_cmpxchg #define arch_cmpxchg(...) \ __atomic_op_fence(arch_cmpxchg, __VA_ARGS__) #endif #endif /* arch_cmpxchg_relaxed */ #ifndef arch_cmpxchg64_relaxed #define arch_cmpxchg64_relaxed arch_cmpxchg64 #define arch_cmpxchg64_acquire arch_cmpxchg64 #define arch_cmpxchg64_release arch_cmpxchg64 #else /* arch_cmpxchg64_relaxed */ #ifndef arch_cmpxchg64_acquire #define arch_cmpxchg64_acquire(...) \ __atomic_op_acquire(arch_cmpxchg64, __VA_ARGS__) #endif #ifndef arch_cmpxchg64_release #define arch_cmpxchg64_release(...) \ __atomic_op_release(arch_cmpxchg64, __VA_ARGS__) #endif #ifndef arch_cmpxchg64 #define arch_cmpxchg64(...) \ __atomic_op_fence(arch_cmpxchg64, __VA_ARGS__) #endif #endif /* arch_cmpxchg64_relaxed */ #ifndef arch_atomic_read_acquire static __always_inline int arch_atomic_read_acquire(const atomic_t *v) { return smp_load_acquire(&(v)->counter); } #define arch_atomic_read_acquire arch_atomic_read_acquire #endif #ifndef arch_atomic_set_release static __always_inline void arch_atomic_set_release(atomic_t *v, int i) { smp_store_release(&(v)->counter, i); } #define arch_atomic_set_release arch_atomic_set_release #endif #ifndef arch_atomic_add_return_relaxed #define arch_atomic_add_return_acquire arch_atomic_add_return #define arch_atomic_add_return_release arch_atomic_add_return #define arch_atomic_add_return_relaxed arch_atomic_add_return #else /* arch_atomic_add_return_relaxed */ #ifndef arch_atomic_add_return_acquire static __always_inline int arch_atomic_add_return_acquire(int i, atomic_t *v) { int ret = arch_atomic_add_return_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic_add_return_acquire arch_atomic_add_return_acquire #endif #ifndef arch_atomic_add_return_release static __always_inline int arch_atomic_add_return_release(int i, atomic_t *v) { __atomic_release_fence(); return arch_atomic_add_return_relaxed(i, v); } #define arch_atomic_add_return_release arch_atomic_add_return_release #endif #ifndef arch_atomic_add_return static __always_inline int arch_atomic_add_return(int i, atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_add_return_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic_add_return arch_atomic_add_return #endif #endif /* arch_atomic_add_return_relaxed */ #ifndef arch_atomic_fetch_add_relaxed #define arch_atomic_fetch_add_acquire arch_atomic_fetch_add #define arch_atomic_fetch_add_release arch_atomic_fetch_add #define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add #else /* arch_atomic_fetch_add_relaxed */ #ifndef arch_atomic_fetch_add_acquire static __always_inline int arch_atomic_fetch_add_acquire(int i, atomic_t *v) { int ret = arch_atomic_fetch_add_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic_fetch_add_acquire arch_atomic_fetch_add_acquire #endif #ifndef arch_atomic_fetch_add_release static __always_inline int arch_atomic_fetch_add_release(int i, atomic_t *v) { __atomic_release_fence(); return arch_atomic_fetch_add_relaxed(i, v); } #define arch_atomic_fetch_add_release arch_atomic_fetch_add_release #endif #ifndef arch_atomic_fetch_add static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_add_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic_fetch_add arch_atomic_fetch_add #endif #endif /* arch_atomic_fetch_add_relaxed */ #ifndef arch_atomic_sub_return_relaxed #define arch_atomic_sub_return_acquire arch_atomic_sub_return #define arch_atomic_sub_return_release arch_atomic_sub_return #define arch_atomic_sub_return_relaxed arch_atomic_sub_return #else /* arch_atomic_sub_return_relaxed */ #ifndef arch_atomic_sub_return_acquire static __always_inline int arch_atomic_sub_return_acquire(int i, atomic_t *v) { int ret = arch_atomic_sub_return_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic_sub_return_acquire arch_atomic_sub_return_acquire #endif #ifndef arch_atomic_sub_return_release static __always_inline int arch_atomic_sub_return_release(int i, atomic_t *v) { __atomic_release_fence(); return arch_atomic_sub_return_relaxed(i, v); } #define arch_atomic_sub_return_release arch_atomic_sub_return_release #endif #ifndef arch_atomic_sub_return static __always_inline int arch_atomic_sub_return(int i, atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_sub_return_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic_sub_return arch_atomic_sub_return #endif #endif /* arch_atomic_sub_return_relaxed */ #ifndef arch_atomic_fetch_sub_relaxed #define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub #define arch_atomic_fetch_sub_release arch_atomic_fetch_sub #define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub #else /* arch_atomic_fetch_sub_relaxed */ #ifndef arch_atomic_fetch_sub_acquire static __always_inline int arch_atomic_fetch_sub_acquire(int i, atomic_t *v) { int ret = arch_atomic_fetch_sub_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub_acquire #endif #ifndef arch_atomic_fetch_sub_release static __always_inline int arch_atomic_fetch_sub_release(int i, atomic_t *v) { __atomic_release_fence(); return arch_atomic_fetch_sub_relaxed(i, v); } #define arch_atomic_fetch_sub_release arch_atomic_fetch_sub_release #endif #ifndef arch_atomic_fetch_sub static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_sub_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic_fetch_sub arch_atomic_fetch_sub #endif #endif /* arch_atomic_fetch_sub_relaxed */ #ifndef arch_atomic_inc static __always_inline void arch_atomic_inc(atomic_t *v) { arch_atomic_add(1, v); } #define arch_atomic_inc arch_atomic_inc #endif #ifndef arch_atomic_inc_return_relaxed #ifdef arch_atomic_inc_return #define arch_atomic_inc_return_acquire arch_atomic_inc_return #define arch_atomic_inc_return_release arch_atomic_inc_return #define arch_atomic_inc_return_relaxed arch_atomic_inc_return #endif /* arch_atomic_inc_return */ #ifndef arch_atomic_inc_return static __always_inline int arch_atomic_inc_return(atomic_t *v) { return arch_atomic_add_return(1, v); } #define arch_atomic_inc_return arch_atomic_inc_return #endif #ifndef arch_atomic_inc_return_acquire static __always_inline int arch_atomic_inc_return_acquire(atomic_t *v) { return arch_atomic_add_return_acquire(1, v); } #define arch_atomic_inc_return_acquire arch_atomic_inc_return_acquire #endif #ifndef arch_atomic_inc_return_release static __always_inline int arch_atomic_inc_return_release(atomic_t *v) { return arch_atomic_add_return_release(1, v); } #define arch_atomic_inc_return_release arch_atomic_inc_return_release #endif #ifndef arch_atomic_inc_return_relaxed static __always_inline int arch_atomic_inc_return_relaxed(atomic_t *v) { return arch_atomic_add_return_relaxed(1, v); } #define arch_atomic_inc_return_relaxed arch_atomic_inc_return_relaxed #endif #else /* arch_atomic_inc_return_relaxed */ #ifndef arch_atomic_inc_return_acquire static __always_inline int arch_atomic_inc_return_acquire(atomic_t *v) { int ret = arch_atomic_inc_return_relaxed(v); __atomic_acquire_fence(); return ret; } #define arch_atomic_inc_return_acquire arch_atomic_inc_return_acquire #endif #ifndef arch_atomic_inc_return_release static __always_inline int arch_atomic_inc_return_release(atomic_t *v) { __atomic_release_fence(); return arch_atomic_inc_return_relaxed(v); } #define arch_atomic_inc_return_release arch_atomic_inc_return_release #endif #ifndef arch_atomic_inc_return static __always_inline int arch_atomic_inc_return(atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_inc_return_relaxed(v); __atomic_post_full_fence(); return ret; } #define arch_atomic_inc_return arch_atomic_inc_return #endif #endif /* arch_atomic_inc_return_relaxed */ #ifndef arch_atomic_fetch_inc_relaxed #ifdef arch_atomic_fetch_inc #define arch_atomic_fetch_inc_acquire arch_atomic_fetch_inc #define arch_atomic_fetch_inc_release arch_atomic_fetch_inc #define arch_atomic_fetch_inc_relaxed arch_atomic_fetch_inc #endif /* arch_atomic_fetch_inc */ #ifndef arch_atomic_fetch_inc static __always_inline int arch_atomic_fetch_inc(atomic_t *v) { return arch_atomic_fetch_add(1, v); } #define arch_atomic_fetch_inc arch_atomic_fetch_inc #endif #ifndef arch_atomic_fetch_inc_acquire static __always_inline int arch_atomic_fetch_inc_acquire(atomic_t *v) { return arch_atomic_fetch_add_acquire(1, v); } #define arch_atomic_fetch_inc_acquire arch_atomic_fetch_inc_acquire #endif #ifndef arch_atomic_fetch_inc_release static __always_inline int arch_atomic_fetch_inc_release(atomic_t *v) { return arch_atomic_fetch_add_release(1, v); } #define arch_atomic_fetch_inc_release arch_atomic_fetch_inc_release #endif #ifndef arch_atomic_fetch_inc_relaxed static __always_inline int arch_atomic_fetch_inc_relaxed(atomic_t *v) { return arch_atomic_fetch_add_relaxed(1, v); } #define arch_atomic_fetch_inc_relaxed arch_atomic_fetch_inc_relaxed #endif #else /* arch_atomic_fetch_inc_relaxed */ #ifndef arch_atomic_fetch_inc_acquire static __always_inline int arch_atomic_fetch_inc_acquire(atomic_t *v) { int ret = arch_atomic_fetch_inc_relaxed(v); __atomic_acquire_fence(); return ret; } #define arch_atomic_fetch_inc_acquire arch_atomic_fetch_inc_acquire #endif #ifndef arch_atomic_fetch_inc_release static __always_inline int arch_atomic_fetch_inc_release(atomic_t *v) { __atomic_release_fence(); return arch_atomic_fetch_inc_relaxed(v); } #define arch_atomic_fetch_inc_release arch_atomic_fetch_inc_release #endif #ifndef arch_atomic_fetch_inc static __always_inline int arch_atomic_fetch_inc(atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_inc_relaxed(v); __atomic_post_full_fence(); return ret; } #define arch_atomic_fetch_inc arch_atomic_fetch_inc #endif #endif /* arch_atomic_fetch_inc_relaxed */ #ifndef arch_atomic_dec static __always_inline void arch_atomic_dec(atomic_t *v) { arch_atomic_sub(1, v); } #define arch_atomic_dec arch_atomic_dec #endif #ifndef arch_atomic_dec_return_relaxed #ifdef arch_atomic_dec_return #define arch_atomic_dec_return_acquire arch_atomic_dec_return #define arch_atomic_dec_return_release arch_atomic_dec_return #define arch_atomic_dec_return_relaxed arch_atomic_dec_return #endif /* arch_atomic_dec_return */ #ifndef arch_atomic_dec_return static __always_inline int arch_atomic_dec_return(atomic_t *v) { return arch_atomic_sub_return(1, v); } #define arch_atomic_dec_return arch_atomic_dec_return #endif #ifndef arch_atomic_dec_return_acquire static __always_inline int arch_atomic_dec_return_acquire(atomic_t *v) { return arch_atomic_sub_return_acquire(1, v); } #define arch_atomic_dec_return_acquire arch_atomic_dec_return_acquire #endif #ifndef arch_atomic_dec_return_release static __always_inline int arch_atomic_dec_return_release(atomic_t *v) { return arch_atomic_sub_return_release(1, v); } #define arch_atomic_dec_return_release arch_atomic_dec_return_release #endif #ifndef arch_atomic_dec_return_relaxed static __always_inline int arch_atomic_dec_return_relaxed(atomic_t *v) { return arch_atomic_sub_return_relaxed(1, v); } #define arch_atomic_dec_return_relaxed arch_atomic_dec_return_relaxed #endif #else /* arch_atomic_dec_return_relaxed */ #ifndef arch_atomic_dec_return_acquire static __always_inline int arch_atomic_dec_return_acquire(atomic_t *v) { int ret = arch_atomic_dec_return_relaxed(v); __atomic_acquire_fence(); return ret; } #define arch_atomic_dec_return_acquire arch_atomic_dec_return_acquire #endif #ifndef arch_atomic_dec_return_release static __always_inline int arch_atomic_dec_return_release(atomic_t *v) { __atomic_release_fence(); return arch_atomic_dec_return_relaxed(v); } #define arch_atomic_dec_return_release arch_atomic_dec_return_release #endif #ifndef arch_atomic_dec_return static __always_inline int arch_atomic_dec_return(atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_dec_return_relaxed(v); __atomic_post_full_fence(); return ret; } #define arch_atomic_dec_return arch_atomic_dec_return #endif #endif /* arch_atomic_dec_return_relaxed */ #ifndef arch_atomic_fetch_dec_relaxed #ifdef arch_atomic_fetch_dec #define arch_atomic_fetch_dec_acquire arch_atomic_fetch_dec #define arch_atomic_fetch_dec_release arch_atomic_fetch_dec #define arch_atomic_fetch_dec_relaxed arch_atomic_fetch_dec #endif /* arch_atomic_fetch_dec */ #ifndef arch_atomic_fetch_dec static __always_inline int arch_atomic_fetch_dec(atomic_t *v) { return arch_atomic_fetch_sub(1, v); } #define arch_atomic_fetch_dec arch_atomic_fetch_dec #endif #ifndef arch_atomic_fetch_dec_acquire static __always_inline int arch_atomic_fetch_dec_acquire(atomic_t *v) { return arch_atomic_fetch_sub_acquire(1, v); } #define arch_atomic_fetch_dec_acquire arch_atomic_fetch_dec_acquire #endif #ifndef arch_atomic_fetch_dec_release static __always_inline int arch_atomic_fetch_dec_release(atomic_t *v) { return arch_atomic_fetch_sub_release(1, v); } #define arch_atomic_fetch_dec_release arch_atomic_fetch_dec_release #endif #ifndef arch_atomic_fetch_dec_relaxed static __always_inline int arch_atomic_fetch_dec_relaxed(atomic_t *v) { return arch_atomic_fetch_sub_relaxed(1, v); } #define arch_atomic_fetch_dec_relaxed arch_atomic_fetch_dec_relaxed #endif #else /* arch_atomic_fetch_dec_relaxed */ #ifndef arch_atomic_fetch_dec_acquire static __always_inline int arch_atomic_fetch_dec_acquire(atomic_t *v) { int ret = arch_atomic_fetch_dec_relaxed(v); __atomic_acquire_fence(); return ret; } #define arch_atomic_fetch_dec_acquire arch_atomic_fetch_dec_acquire #endif #ifndef arch_atomic_fetch_dec_release static __always_inline int arch_atomic_fetch_dec_release(atomic_t *v) { __atomic_release_fence(); return arch_atomic_fetch_dec_relaxed(v); } #define arch_atomic_fetch_dec_release arch_atomic_fetch_dec_release #endif #ifndef arch_atomic_fetch_dec static __always_inline int arch_atomic_fetch_dec(atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_dec_relaxed(v); __atomic_post_full_fence(); return ret; } #define arch_atomic_fetch_dec arch_atomic_fetch_dec #endif #endif /* arch_atomic_fetch_dec_relaxed */ #ifndef arch_atomic_fetch_and_relaxed #define arch_atomic_fetch_and_acquire arch_atomic_fetch_and #define arch_atomic_fetch_and_release arch_atomic_fetch_and #define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and #else /* arch_atomic_fetch_and_relaxed */ #ifndef arch_atomic_fetch_and_acquire static __always_inline int arch_atomic_fetch_and_acquire(int i, atomic_t *v) { int ret = arch_atomic_fetch_and_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic_fetch_and_acquire arch_atomic_fetch_and_acquire #endif #ifndef arch_atomic_fetch_and_release static __always_inline int arch_atomic_fetch_and_release(int i, atomic_t *v) { __atomic_release_fence(); return arch_atomic_fetch_and_relaxed(i, v); } #define arch_atomic_fetch_and_release arch_atomic_fetch_and_release #endif #ifndef arch_atomic_fetch_and static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_and_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic_fetch_and arch_atomic_fetch_and #endif #endif /* arch_atomic_fetch_and_relaxed */ #ifndef arch_atomic_andnot static __always_inline void arch_atomic_andnot(int i, atomic_t *v) { arch_atomic_and(~i, v); } #define arch_atomic_andnot arch_atomic_andnot #endif #ifndef arch_atomic_fetch_andnot_relaxed #ifdef arch_atomic_fetch_andnot #define arch_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot #define arch_atomic_fetch_andnot_release arch_atomic_fetch_andnot #define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot #endif /* arch_atomic_fetch_andnot */ #ifndef arch_atomic_fetch_andnot static __always_inline int arch_atomic_fetch_andnot(int i, atomic_t *v) { return arch_atomic_fetch_and(~i, v); } #define arch_atomic_fetch_andnot arch_atomic_fetch_andnot #endif #ifndef arch_atomic_fetch_andnot_acquire static __always_inline int arch_atomic_fetch_andnot_acquire(int i, atomic_t *v) { return arch_atomic_fetch_and_acquire(~i, v); } #define arch_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot_acquire #endif #ifndef arch_atomic_fetch_andnot_release static __always_inline int arch_atomic_fetch_andnot_release(int i, atomic_t *v) { return arch_atomic_fetch_and_release(~i, v); } #define arch_atomic_fetch_andnot_release arch_atomic_fetch_andnot_release #endif #ifndef arch_atomic_fetch_andnot_relaxed static __always_inline int arch_atomic_fetch_andnot_relaxed(int i, atomic_t *v) { return arch_atomic_fetch_and_relaxed(~i, v); } #define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot_relaxed #endif #else /* arch_atomic_fetch_andnot_relaxed */ #ifndef arch_atomic_fetch_andnot_acquire static __always_inline int arch_atomic_fetch_andnot_acquire(int i, atomic_t *v) { int ret = arch_atomic_fetch_andnot_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot_acquire #endif #ifndef arch_atomic_fetch_andnot_release static __always_inline int arch_atomic_fetch_andnot_release(int i, atomic_t *v) { __atomic_release_fence(); return arch_atomic_fetch_andnot_relaxed(i, v); } #define arch_atomic_fetch_andnot_release arch_atomic_fetch_andnot_release #endif #ifndef arch_atomic_fetch_andnot static __always_inline int arch_atomic_fetch_andnot(int i, atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_andnot_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic_fetch_andnot arch_atomic_fetch_andnot #endif #endif /* arch_atomic_fetch_andnot_relaxed */ #ifndef arch_atomic_fetch_or_relaxed #define arch_atomic_fetch_or_acquire arch_atomic_fetch_or #define arch_atomic_fetch_or_release arch_atomic_fetch_or #define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or #else /* arch_atomic_fetch_or_relaxed */ #ifndef arch_atomic_fetch_or_acquire static __always_inline int arch_atomic_fetch_or_acquire(int i, atomic_t *v) { int ret = arch_atomic_fetch_or_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic_fetch_or_acquire arch_atomic_fetch_or_acquire #endif #ifndef arch_atomic_fetch_or_release static __always_inline int arch_atomic_fetch_or_release(int i, atomic_t *v) { __atomic_release_fence(); return arch_atomic_fetch_or_relaxed(i, v); } #define arch_atomic_fetch_or_release arch_atomic_fetch_or_release #endif #ifndef arch_atomic_fetch_or static __always_inline int arch_atomic_fetch_or(int i, atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_or_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic_fetch_or arch_atomic_fetch_or #endif #endif /* arch_atomic_fetch_or_relaxed */ #ifndef arch_atomic_fetch_xor_relaxed #define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor #define arch_atomic_fetch_xor_release arch_atomic_fetch_xor #define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor #else /* arch_atomic_fetch_xor_relaxed */ #ifndef arch_atomic_fetch_xor_acquire static __always_inline int arch_atomic_fetch_xor_acquire(int i, atomic_t *v) { int ret = arch_atomic_fetch_xor_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor_acquire #endif #ifndef arch_atomic_fetch_xor_release static __always_inline int arch_atomic_fetch_xor_release(int i, atomic_t *v) { __atomic_release_fence(); return arch_atomic_fetch_xor_relaxed(i, v); } #define arch_atomic_fetch_xor_release arch_atomic_fetch_xor_release #endif #ifndef arch_atomic_fetch_xor static __always_inline int arch_atomic_fetch_xor(int i, atomic_t *v) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_xor_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic_fetch_xor arch_atomic_fetch_xor #endif #endif /* arch_atomic_fetch_xor_relaxed */ #ifndef arch_atomic_xchg_relaxed #define arch_atomic_xchg_acquire arch_atomic_xchg #define arch_atomic_xchg_release arch_atomic_xchg #define arch_atomic_xchg_relaxed arch_atomic_xchg #else /* arch_atomic_xchg_relaxed */ #ifndef arch_atomic_xchg_acquire static __always_inline int arch_atomic_xchg_acquire(atomic_t *v, int i) { int ret = arch_atomic_xchg_relaxed(v, i); __atomic_acquire_fence(); return ret; } #define arch_atomic_xchg_acquire arch_atomic_xchg_acquire #endif #ifndef arch_atomic_xchg_release static __always_inline int arch_atomic_xchg_release(atomic_t *v, int i) { __atomic_release_fence(); return arch_atomic_xchg_relaxed(v, i); } #define arch_atomic_xchg_release arch_atomic_xchg_release #endif #ifndef arch_atomic_xchg static __always_inline int arch_atomic_xchg(atomic_t *v, int i) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_xchg_relaxed(v, i); __atomic_post_full_fence(); return ret; } #define arch_atomic_xchg arch_atomic_xchg #endif #endif /* arch_atomic_xchg_relaxed */ #ifndef arch_atomic_cmpxchg_relaxed #define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg #define arch_atomic_cmpxchg_release arch_atomic_cmpxchg #define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg #else /* arch_atomic_cmpxchg_relaxed */ #ifndef arch_atomic_cmpxchg_acquire static __always_inline int arch_atomic_cmpxchg_acquire(atomic_t *v, int old, int new) { int ret = arch_atomic_cmpxchg_relaxed(v, old, new); __atomic_acquire_fence(); return ret; } #define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire #endif #ifndef arch_atomic_cmpxchg_release static __always_inline int arch_atomic_cmpxchg_release(atomic_t *v, int old, int new) { __atomic_release_fence(); return arch_atomic_cmpxchg_relaxed(v, old, new); } #define arch_atomic_cmpxchg_release arch_atomic_cmpxchg_release #endif #ifndef arch_atomic_cmpxchg static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { int ret; __atomic_pre_full_fence(); ret = arch_atomic_cmpxchg_relaxed(v, old, new); __atomic_post_full_fence(); return ret; } #define arch_atomic_cmpxchg arch_atomic_cmpxchg #endif #endif /* arch_atomic_cmpxchg_relaxed */ #ifndef arch_atomic_try_cmpxchg_relaxed #ifdef arch_atomic_try_cmpxchg #define arch_atomic_try_cmpxchg_acquire arch_atomic_try_cmpxchg #define arch_atomic_try_cmpxchg_release arch_atomic_try_cmpxchg #define arch_atomic_try_cmpxchg_relaxed arch_atomic_try_cmpxchg #endif /* arch_atomic_try_cmpxchg */ #ifndef arch_atomic_try_cmpxchg static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new) { int r, o = *old; r = arch_atomic_cmpxchg(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); } #define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg #endif #ifndef arch_atomic_try_cmpxchg_acquire static __always_inline bool arch_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) { int r, o = *old; r = arch_atomic_cmpxchg_acquire(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); } #define arch_atomic_try_cmpxchg_acquire arch_atomic_try_cmpxchg_acquire #endif #ifndef arch_atomic_try_cmpxchg_release static __always_inline bool arch_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) { int r, o = *old; r = arch_atomic_cmpxchg_release(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); } #define arch_atomic_try_cmpxchg_release arch_atomic_try_cmpxchg_release #endif #ifndef arch_atomic_try_cmpxchg_relaxed static __always_inline bool arch_atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new) { int r, o = *old; r = arch_atomic_cmpxchg_relaxed(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); } #define arch_atomic_try_cmpxchg_relaxed arch_atomic_try_cmpxchg_relaxed #endif #else /* arch_atomic_try_cmpxchg_relaxed */ #ifndef arch_atomic_try_cmpxchg_acquire static __always_inline bool arch_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) { bool ret = arch_atomic_try_cmpxchg_relaxed(v, old, new); __atomic_acquire_fence(); return ret; } #define arch_atomic_try_cmpxchg_acquire arch_atomic_try_cmpxchg_acquire #endif #ifndef arch_atomic_try_cmpxchg_release static __always_inline bool arch_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) { __atomic_release_fence(); return arch_atomic_try_cmpxchg_relaxed(v, old, new); } #define arch_atomic_try_cmpxchg_release arch_atomic_try_cmpxchg_release #endif #ifndef arch_atomic_try_cmpxchg static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new) { bool ret; __atomic_pre_full_fence(); ret = arch_atomic_try_cmpxchg_relaxed(v, old, new); __atomic_post_full_fence(); return ret; } #define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg #endif #endif /* arch_atomic_try_cmpxchg_relaxed */ #ifndef arch_atomic_sub_and_test /** * arch_atomic_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @v: pointer of type atomic_t * * Atomically subtracts @i from @v and returns * true if the result is zero, or false for all * other cases. */ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) { return arch_atomic_sub_return(i, v) == 0; } #define arch_atomic_sub_and_test arch_atomic_sub_and_test #endif #ifndef arch_atomic_dec_and_test /** * arch_atomic_dec_and_test - decrement and test * @v: pointer of type atomic_t * * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other * cases. */ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) { return arch_atomic_dec_return(v) == 0; } #define arch_atomic_dec_and_test arch_atomic_dec_and_test #endif #ifndef arch_atomic_inc_and_test /** * arch_atomic_inc_and_test - increment and test * @v: pointer of type atomic_t * * Atomically increments @v by 1 * and returns true if the result is zero, or false for all * other cases. */ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) { return arch_atomic_inc_return(v) == 0; } #define arch_atomic_inc_and_test arch_atomic_inc_and_test #endif #ifndef arch_atomic_add_negative /** * arch_atomic_add_negative - add and test if negative * @i: integer value to add * @v: pointer of type atomic_t * * Atomically adds @i to @v and returns true * if the result is negative, or false when * result is greater than or equal to zero. */ static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v) { return arch_atomic_add_return(i, v) < 0; } #define arch_atomic_add_negative arch_atomic_add_negative #endif #ifndef arch_atomic_fetch_add_unless /** * arch_atomic_fetch_add_unless - add unless the number is already a given value * @v: pointer of type atomic_t * @a: the amount to add to v... * @u: ...unless v is equal to u. * * Atomically adds @a to @v, so long as @v was not already @u. * Returns original value of @v */ static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) { int c = arch_atomic_read(v); do { if (unlikely(c == u)) break; } while (!arch_atomic_try_cmpxchg(v, &c, c + a)); return c; } #define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless #endif #ifndef arch_atomic_add_unless /** * arch_atomic_add_unless - add unless the number is already a given value * @v: pointer of type atomic_t * @a: the amount to add to v... * @u: ...unless v is equal to u. * * Atomically adds @a to @v, if @v was not already @u. * Returns true if the addition was done. */ static __always_inline bool arch_atomic_add_unless(atomic_t *v, int a, int u) { return arch_atomic_fetch_add_unless(v, a, u) != u; } #define arch_atomic_add_unless arch_atomic_add_unless #endif #ifndef arch_atomic_inc_not_zero /** * arch_atomic_inc_not_zero - increment unless the number is zero * @v: pointer of type atomic_t * * Atomically increments @v by 1, if @v is non-zero. * Returns true if the increment was done. */ static __always_inline bool arch_atomic_inc_not_zero(atomic_t *v) { return arch_atomic_add_unless(v, 1, 0); } #define arch_atomic_inc_not_zero arch_atomic_inc_not_zero #endif #ifndef arch_atomic_inc_unless_negative static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v) { int c = arch_atomic_read(v); do { if (unlikely(c < 0)) return false; } while (!arch_atomic_try_cmpxchg(v, &c, c + 1)); return true; } #define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative #endif #ifndef arch_atomic_dec_unless_positive static __always_inline bool arch_atomic_dec_unless_positive(atomic_t *v) { int c = arch_atomic_read(v); do { if (unlikely(c > 0)) return false; } while (!arch_atomic_try_cmpxchg(v, &c, c - 1)); return true; } #define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive #endif #ifndef arch_atomic_dec_if_positive static __always_inline int arch_atomic_dec_if_positive(atomic_t *v) { int dec, c = arch_atomic_read(v); do { dec = c - 1; if (unlikely(dec < 0)) break; } while (!arch_atomic_try_cmpxchg(v, &c, dec)); return dec; } #define arch_atomic_dec_if_positive arch_atomic_dec_if_positive #endif #ifdef CONFIG_GENERIC_ATOMIC64 #include <asm-generic/atomic64.h> #endif #ifndef arch_atomic64_read_acquire static __always_inline s64 arch_atomic64_read_acquire(const atomic64_t *v) { return smp_load_acquire(&(v)->counter); } #define arch_atomic64_read_acquire arch_atomic64_read_acquire #endif #ifndef arch_atomic64_set_release static __always_inline void arch_atomic64_set_release(atomic64_t *v, s64 i) { smp_store_release(&(v)->counter, i); } #define arch_atomic64_set_release arch_atomic64_set_release #endif #ifndef arch_atomic64_add_return_relaxed #define arch_atomic64_add_return_acquire arch_atomic64_add_return #define arch_atomic64_add_return_release arch_atomic64_add_return #define arch_atomic64_add_return_relaxed arch_atomic64_add_return #else /* arch_atomic64_add_return_relaxed */ #ifndef arch_atomic64_add_return_acquire static __always_inline s64 arch_atomic64_add_return_acquire(s64 i, atomic64_t *v) { s64 ret = arch_atomic64_add_return_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_add_return_acquire arch_atomic64_add_return_acquire #endif #ifndef arch_atomic64_add_return_release static __always_inline s64 arch_atomic64_add_return_release(s64 i, atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_add_return_relaxed(i, v); } #define arch_atomic64_add_return_release arch_atomic64_add_return_release #endif #ifndef arch_atomic64_add_return static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_add_return_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_add_return arch_atomic64_add_return #endif #endif /* arch_atomic64_add_return_relaxed */ #ifndef arch_atomic64_fetch_add_relaxed #define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add #define arch_atomic64_fetch_add_release arch_atomic64_fetch_add #define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add #else /* arch_atomic64_fetch_add_relaxed */ #ifndef arch_atomic64_fetch_add_acquire static __always_inline s64 arch_atomic64_fetch_add_acquire(s64 i, atomic64_t *v) { s64 ret = arch_atomic64_fetch_add_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add_acquire #endif #ifndef arch_atomic64_fetch_add_release static __always_inline s64 arch_atomic64_fetch_add_release(s64 i, atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_fetch_add_relaxed(i, v); } #define arch_atomic64_fetch_add_release arch_atomic64_fetch_add_release #endif #ifndef arch_atomic64_fetch_add static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_add_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_fetch_add arch_atomic64_fetch_add #endif #endif /* arch_atomic64_fetch_add_relaxed */ #ifndef arch_atomic64_sub_return_relaxed #define arch_atomic64_sub_return_acquire arch_atomic64_sub_return #define arch_atomic64_sub_return_release arch_atomic64_sub_return #define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return #else /* arch_atomic64_sub_return_relaxed */ #ifndef arch_atomic64_sub_return_acquire static __always_inline s64 arch_atomic64_sub_return_acquire(s64 i, atomic64_t *v) { s64 ret = arch_atomic64_sub_return_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_sub_return_acquire arch_atomic64_sub_return_acquire #endif #ifndef arch_atomic64_sub_return_release static __always_inline s64 arch_atomic64_sub_return_release(s64 i, atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_sub_return_relaxed(i, v); } #define arch_atomic64_sub_return_release arch_atomic64_sub_return_release #endif #ifndef arch_atomic64_sub_return static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_sub_return_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_sub_return arch_atomic64_sub_return #endif #endif /* arch_atomic64_sub_return_relaxed */ #ifndef arch_atomic64_fetch_sub_relaxed #define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub #define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub #define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub #else /* arch_atomic64_fetch_sub_relaxed */ #ifndef arch_atomic64_fetch_sub_acquire static __always_inline s64 arch_atomic64_fetch_sub_acquire(s64 i, atomic64_t *v) { s64 ret = arch_atomic64_fetch_sub_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub_acquire #endif #ifndef arch_atomic64_fetch_sub_release static __always_inline s64 arch_atomic64_fetch_sub_release(s64 i, atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_fetch_sub_relaxed(i, v); } #define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub_release #endif #ifndef arch_atomic64_fetch_sub static __always_inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_sub_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_fetch_sub arch_atomic64_fetch_sub #endif #endif /* arch_atomic64_fetch_sub_relaxed */ #ifndef arch_atomic64_inc static __always_inline void arch_atomic64_inc(atomic64_t *v) { arch_atomic64_add(1, v); } #define arch_atomic64_inc arch_atomic64_inc #endif #ifndef arch_atomic64_inc_return_relaxed #ifdef arch_atomic64_inc_return #define arch_atomic64_inc_return_acquire arch_atomic64_inc_return #define arch_atomic64_inc_return_release arch_atomic64_inc_return #define arch_atomic64_inc_return_relaxed arch_atomic64_inc_return #endif /* arch_atomic64_inc_return */ #ifndef arch_atomic64_inc_return static __always_inline s64 arch_atomic64_inc_return(atomic64_t *v) { return arch_atomic64_add_return(1, v); } #define arch_atomic64_inc_return arch_atomic64_inc_return #endif #ifndef arch_atomic64_inc_return_acquire static __always_inline s64 arch_atomic64_inc_return_acquire(atomic64_t *v) { return arch_atomic64_add_return_acquire(1, v); } #define arch_atomic64_inc_return_acquire arch_atomic64_inc_return_acquire #endif #ifndef arch_atomic64_inc_return_release static __always_inline s64 arch_atomic64_inc_return_release(atomic64_t *v) { return arch_atomic64_add_return_release(1, v); } #define arch_atomic64_inc_return_release arch_atomic64_inc_return_release #endif #ifndef arch_atomic64_inc_return_relaxed static __always_inline s64 arch_atomic64_inc_return_relaxed(atomic64_t *v) { return arch_atomic64_add_return_relaxed(1, v); } #define arch_atomic64_inc_return_relaxed arch_atomic64_inc_return_relaxed #endif #else /* arch_atomic64_inc_return_relaxed */ #ifndef arch_atomic64_inc_return_acquire static __always_inline s64 arch_atomic64_inc_return_acquire(atomic64_t *v) { s64 ret = arch_atomic64_inc_return_relaxed(v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_inc_return_acquire arch_atomic64_inc_return_acquire #endif #ifndef arch_atomic64_inc_return_release static __always_inline s64 arch_atomic64_inc_return_release(atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_inc_return_relaxed(v); } #define arch_atomic64_inc_return_release arch_atomic64_inc_return_release #endif #ifndef arch_atomic64_inc_return static __always_inline s64 arch_atomic64_inc_return(atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_inc_return_relaxed(v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_inc_return arch_atomic64_inc_return #endif #endif /* arch_atomic64_inc_return_relaxed */ #ifndef arch_atomic64_fetch_inc_relaxed #ifdef arch_atomic64_fetch_inc #define arch_atomic64_fetch_inc_acquire arch_atomic64_fetch_inc #define arch_atomic64_fetch_inc_release arch_atomic64_fetch_inc #define arch_atomic64_fetch_inc_relaxed arch_atomic64_fetch_inc #endif /* arch_atomic64_fetch_inc */ #ifndef arch_atomic64_fetch_inc static __always_inline s64 arch_atomic64_fetch_inc(atomic64_t *v) { return arch_atomic64_fetch_add(1, v); } #define arch_atomic64_fetch_inc arch_atomic64_fetch_inc #endif #ifndef arch_atomic64_fetch_inc_acquire static __always_inline s64 arch_atomic64_fetch_inc_acquire(atomic64_t *v) { return arch_atomic64_fetch_add_acquire(1, v); } #define arch_atomic64_fetch_inc_acquire arch_atomic64_fetch_inc_acquire #endif #ifndef arch_atomic64_fetch_inc_release static __always_inline s64 arch_atomic64_fetch_inc_release(atomic64_t *v) { return arch_atomic64_fetch_add_release(1, v); } #define arch_atomic64_fetch_inc_release arch_atomic64_fetch_inc_release #endif #ifndef arch_atomic64_fetch_inc_relaxed static __always_inline s64 arch_atomic64_fetch_inc_relaxed(atomic64_t *v) { return arch_atomic64_fetch_add_relaxed(1, v); } #define arch_atomic64_fetch_inc_relaxed arch_atomic64_fetch_inc_relaxed #endif #else /* arch_atomic64_fetch_inc_relaxed */ #ifndef arch_atomic64_fetch_inc_acquire static __always_inline s64 arch_atomic64_fetch_inc_acquire(atomic64_t *v) { s64 ret = arch_atomic64_fetch_inc_relaxed(v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_fetch_inc_acquire arch_atomic64_fetch_inc_acquire #endif #ifndef arch_atomic64_fetch_inc_release static __always_inline s64 arch_atomic64_fetch_inc_release(atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_fetch_inc_relaxed(v); } #define arch_atomic64_fetch_inc_release arch_atomic64_fetch_inc_release #endif #ifndef arch_atomic64_fetch_inc static __always_inline s64 arch_atomic64_fetch_inc(atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_inc_relaxed(v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_fetch_inc arch_atomic64_fetch_inc #endif #endif /* arch_atomic64_fetch_inc_relaxed */ #ifndef arch_atomic64_dec static __always_inline void arch_atomic64_dec(atomic64_t *v) { arch_atomic64_sub(1, v); } #define arch_atomic64_dec arch_atomic64_dec #endif #ifndef arch_atomic64_dec_return_relaxed #ifdef arch_atomic64_dec_return #define arch_atomic64_dec_return_acquire arch_atomic64_dec_return #define arch_atomic64_dec_return_release arch_atomic64_dec_return #define arch_atomic64_dec_return_relaxed arch_atomic64_dec_return #endif /* arch_atomic64_dec_return */ #ifndef arch_atomic64_dec_return static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v) { return arch_atomic64_sub_return(1, v); } #define arch_atomic64_dec_return arch_atomic64_dec_return #endif #ifndef arch_atomic64_dec_return_acquire static __always_inline s64 arch_atomic64_dec_return_acquire(atomic64_t *v) { return arch_atomic64_sub_return_acquire(1, v); } #define arch_atomic64_dec_return_acquire arch_atomic64_dec_return_acquire #endif #ifndef arch_atomic64_dec_return_release static __always_inline s64 arch_atomic64_dec_return_release(atomic64_t *v) { return arch_atomic64_sub_return_release(1, v); } #define arch_atomic64_dec_return_release arch_atomic64_dec_return_release #endif #ifndef arch_atomic64_dec_return_relaxed static __always_inline s64 arch_atomic64_dec_return_relaxed(atomic64_t *v) { return arch_atomic64_sub_return_relaxed(1, v); } #define arch_atomic64_dec_return_relaxed arch_atomic64_dec_return_relaxed #endif #else /* arch_atomic64_dec_return_relaxed */ #ifndef arch_atomic64_dec_return_acquire static __always_inline s64 arch_atomic64_dec_return_acquire(atomic64_t *v) { s64 ret = arch_atomic64_dec_return_relaxed(v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_dec_return_acquire arch_atomic64_dec_return_acquire #endif #ifndef arch_atomic64_dec_return_release static __always_inline s64 arch_atomic64_dec_return_release(atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_dec_return_relaxed(v); } #define arch_atomic64_dec_return_release arch_atomic64_dec_return_release #endif #ifndef arch_atomic64_dec_return static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_dec_return_relaxed(v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_dec_return arch_atomic64_dec_return #endif #endif /* arch_atomic64_dec_return_relaxed */ #ifndef arch_atomic64_fetch_dec_relaxed #ifdef arch_atomic64_fetch_dec #define arch_atomic64_fetch_dec_acquire arch_atomic64_fetch_dec #define arch_atomic64_fetch_dec_release arch_atomic64_fetch_dec #define arch_atomic64_fetch_dec_relaxed arch_atomic64_fetch_dec #endif /* arch_atomic64_fetch_dec */ #ifndef arch_atomic64_fetch_dec static __always_inline s64 arch_atomic64_fetch_dec(atomic64_t *v) { return arch_atomic64_fetch_sub(1, v); } #define arch_atomic64_fetch_dec arch_atomic64_fetch_dec #endif #ifndef arch_atomic64_fetch_dec_acquire static __always_inline s64 arch_atomic64_fetch_dec_acquire(atomic64_t *v) { return arch_atomic64_fetch_sub_acquire(1, v); } #define arch_atomic64_fetch_dec_acquire arch_atomic64_fetch_dec_acquire #endif #ifndef arch_atomic64_fetch_dec_release static __always_inline s64 arch_atomic64_fetch_dec_release(atomic64_t *v) { return arch_atomic64_fetch_sub_release(1, v); } #define arch_atomic64_fetch_dec_release arch_atomic64_fetch_dec_release #endif #ifndef arch_atomic64_fetch_dec_relaxed static __always_inline s64 arch_atomic64_fetch_dec_relaxed(atomic64_t *v) { return arch_atomic64_fetch_sub_relaxed(1, v); } #define arch_atomic64_fetch_dec_relaxed arch_atomic64_fetch_dec_relaxed #endif #else /* arch_atomic64_fetch_dec_relaxed */ #ifndef arch_atomic64_fetch_dec_acquire static __always_inline s64 arch_atomic64_fetch_dec_acquire(atomic64_t *v) { s64 ret = arch_atomic64_fetch_dec_relaxed(v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_fetch_dec_acquire arch_atomic64_fetch_dec_acquire #endif #ifndef arch_atomic64_fetch_dec_release static __always_inline s64 arch_atomic64_fetch_dec_release(atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_fetch_dec_relaxed(v); } #define arch_atomic64_fetch_dec_release arch_atomic64_fetch_dec_release #endif #ifndef arch_atomic64_fetch_dec static __always_inline s64 arch_atomic64_fetch_dec(atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_dec_relaxed(v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_fetch_dec arch_atomic64_fetch_dec #endif #endif /* arch_atomic64_fetch_dec_relaxed */ #ifndef arch_atomic64_fetch_and_relaxed #define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and #define arch_atomic64_fetch_and_release arch_atomic64_fetch_and #define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and #else /* arch_atomic64_fetch_and_relaxed */ #ifndef arch_atomic64_fetch_and_acquire static __always_inline s64 arch_atomic64_fetch_and_acquire(s64 i, atomic64_t *v) { s64 ret = arch_atomic64_fetch_and_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and_acquire #endif #ifndef arch_atomic64_fetch_and_release static __always_inline s64 arch_atomic64_fetch_and_release(s64 i, atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_fetch_and_relaxed(i, v); } #define arch_atomic64_fetch_and_release arch_atomic64_fetch_and_release #endif #ifndef arch_atomic64_fetch_and static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_and_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_fetch_and arch_atomic64_fetch_and #endif #endif /* arch_atomic64_fetch_and_relaxed */ #ifndef arch_atomic64_andnot static __always_inline void arch_atomic64_andnot(s64 i, atomic64_t *v) { arch_atomic64_and(~i, v); } #define arch_atomic64_andnot arch_atomic64_andnot #endif #ifndef arch_atomic64_fetch_andnot_relaxed #ifdef arch_atomic64_fetch_andnot #define arch_atomic64_fetch_andnot_acquire arch_atomic64_fetch_andnot #define arch_atomic64_fetch_andnot_release arch_atomic64_fetch_andnot #define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot #endif /* arch_atomic64_fetch_andnot */ #ifndef arch_atomic64_fetch_andnot static __always_inline s64 arch_atomic64_fetch_andnot(s64 i, atomic64_t *v) { return arch_atomic64_fetch_and(~i, v); } #define arch_atomic64_fetch_andnot arch_atomic64_fetch_andnot #endif #ifndef arch_atomic64_fetch_andnot_acquire static __always_inline s64 arch_atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v) { return arch_atomic64_fetch_and_acquire(~i, v); } #define arch_atomic64_fetch_andnot_acquire arch_atomic64_fetch_andnot_acquire #endif #ifndef arch_atomic64_fetch_andnot_release static __always_inline s64 arch_atomic64_fetch_andnot_release(s64 i, atomic64_t *v) { return arch_atomic64_fetch_and_release(~i, v); } #define arch_atomic64_fetch_andnot_release arch_atomic64_fetch_andnot_release #endif #ifndef arch_atomic64_fetch_andnot_relaxed static __always_inline s64 arch_atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v) { return arch_atomic64_fetch_and_relaxed(~i, v); } #define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot_relaxed #endif #else /* arch_atomic64_fetch_andnot_relaxed */ #ifndef arch_atomic64_fetch_andnot_acquire static __always_inline s64 arch_atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v) { s64 ret = arch_atomic64_fetch_andnot_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_fetch_andnot_acquire arch_atomic64_fetch_andnot_acquire #endif #ifndef arch_atomic64_fetch_andnot_release static __always_inline s64 arch_atomic64_fetch_andnot_release(s64 i, atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_fetch_andnot_relaxed(i, v); } #define arch_atomic64_fetch_andnot_release arch_atomic64_fetch_andnot_release #endif #ifndef arch_atomic64_fetch_andnot static __always_inline s64 arch_atomic64_fetch_andnot(s64 i, atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_andnot_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_fetch_andnot arch_atomic64_fetch_andnot #endif #endif /* arch_atomic64_fetch_andnot_relaxed */ #ifndef arch_atomic64_fetch_or_relaxed #define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or #define arch_atomic64_fetch_or_release arch_atomic64_fetch_or #define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or #else /* arch_atomic64_fetch_or_relaxed */ #ifndef arch_atomic64_fetch_or_acquire static __always_inline s64 arch_atomic64_fetch_or_acquire(s64 i, atomic64_t *v) { s64 ret = arch_atomic64_fetch_or_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or_acquire #endif #ifndef arch_atomic64_fetch_or_release static __always_inline s64 arch_atomic64_fetch_or_release(s64 i, atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_fetch_or_relaxed(i, v); } #define arch_atomic64_fetch_or_release arch_atomic64_fetch_or_release #endif #ifndef arch_atomic64_fetch_or static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_or_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_fetch_or arch_atomic64_fetch_or #endif #endif /* arch_atomic64_fetch_or_relaxed */ #ifndef arch_atomic64_fetch_xor_relaxed #define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor #define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor #define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor #else /* arch_atomic64_fetch_xor_relaxed */ #ifndef arch_atomic64_fetch_xor_acquire static __always_inline s64 arch_atomic64_fetch_xor_acquire(s64 i, atomic64_t *v) { s64 ret = arch_atomic64_fetch_xor_relaxed(i, v); __atomic_acquire_fence(); return ret; } #define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor_acquire #endif #ifndef arch_atomic64_fetch_xor_release static __always_inline s64 arch_atomic64_fetch_xor_release(s64 i, atomic64_t *v) { __atomic_release_fence(); return arch_atomic64_fetch_xor_relaxed(i, v); } #define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release #endif #ifndef arch_atomic64_fetch_xor static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_xor_relaxed(i, v); __atomic_post_full_fence(); return ret; } #define arch_atomic64_fetch_xor arch_atomic64_fetch_xor #endif #endif /* arch_atomic64_fetch_xor_relaxed */ #ifndef arch_atomic64_xchg_relaxed #define arch_atomic64_xchg_acquire arch_atomic64_xchg #define arch_atomic64_xchg_release arch_atomic64_xchg #define arch_atomic64_xchg_relaxed arch_atomic64_xchg #else /* arch_atomic64_xchg_relaxed */ #ifndef arch_atomic64_xchg_acquire static __always_inline s64 arch_atomic64_xchg_acquire(atomic64_t *v, s64 i) { s64 ret = arch_atomic64_xchg_relaxed(v, i); __atomic_acquire_fence(); return ret; } #define arch_atomic64_xchg_acquire arch_atomic64_xchg_acquire #endif #ifndef arch_atomic64_xchg_release static __always_inline s64 arch_atomic64_xchg_release(atomic64_t *v, s64 i) { __atomic_release_fence(); return arch_atomic64_xchg_relaxed(v, i); } #define arch_atomic64_xchg_release arch_atomic64_xchg_release #endif #ifndef arch_atomic64_xchg static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 i) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_xchg_relaxed(v, i); __atomic_post_full_fence(); return ret; } #define arch_atomic64_xchg arch_atomic64_xchg #endif #endif /* arch_atomic64_xchg_relaxed */ #ifndef arch_atomic64_cmpxchg_relaxed #define arch_atomic64_cmpxchg_acquire arch_atomic64_cmpxchg #define arch_atomic64_cmpxchg_release arch_atomic64_cmpxchg #define arch_atomic64_cmpxchg_relaxed arch_atomic64_cmpxchg #else /* arch_atomic64_cmpxchg_relaxed */ #ifndef arch_atomic64_cmpxchg_acquire static __always_inline s64 arch_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) { s64 ret = arch_atomic64_cmpxchg_relaxed(v, old, new); __atomic_acquire_fence(); return ret; } #define arch_atomic64_cmpxchg_acquire arch_atomic64_cmpxchg_acquire #endif #ifndef arch_atomic64_cmpxchg_release static __always_inline s64 arch_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) { __atomic_release_fence(); return arch_atomic64_cmpxchg_relaxed(v, old, new); } #define arch_atomic64_cmpxchg_release arch_atomic64_cmpxchg_release #endif #ifndef arch_atomic64_cmpxchg static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_cmpxchg_relaxed(v, old, new); __atomic_post_full_fence(); return ret; } #define arch_atomic64_cmpxchg arch_atomic64_cmpxchg #endif #endif /* arch_atomic64_cmpxchg_relaxed */ #ifndef arch_atomic64_try_cmpxchg_relaxed #ifdef arch_atomic64_try_cmpxchg #define arch_atomic64_try_cmpxchg_acquire arch_atomic64_try_cmpxchg #define arch_atomic64_try_cmpxchg_release arch_atomic64_try_cmpxchg #define arch_atomic64_try_cmpxchg_relaxed arch_atomic64_try_cmpxchg #endif /* arch_atomic64_try_cmpxchg */ #ifndef arch_atomic64_try_cmpxchg static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { s64 r, o = *old; r = arch_atomic64_cmpxchg(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); } #define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg #endif #ifndef arch_atomic64_try_cmpxchg_acquire static __always_inline bool arch_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) { s64 r, o = *old; r = arch_atomic64_cmpxchg_acquire(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); } #define arch_atomic64_try_cmpxchg_acquire arch_atomic64_try_cmpxchg_acquire #endif #ifndef arch_atomic64_try_cmpxchg_release static __always_inline bool arch_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) { s64 r, o = *old; r = arch_atomic64_cmpxchg_release(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); } #define arch_atomic64_try_cmpxchg_release arch_atomic64_try_cmpxchg_release #endif #ifndef arch_atomic64_try_cmpxchg_relaxed static __always_inline bool arch_atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new) { s64 r, o = *old; r = arch_atomic64_cmpxchg_relaxed(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); } #define arch_atomic64_try_cmpxchg_relaxed arch_atomic64_try_cmpxchg_relaxed #endif #else /* arch_atomic64_try_cmpxchg_relaxed */ #ifndef arch_atomic64_try_cmpxchg_acquire static __always_inline bool arch_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) { bool ret = arch_atomic64_try_cmpxchg_relaxed(v, old, new); __atomic_acquire_fence(); return ret; } #define arch_atomic64_try_cmpxchg_acquire arch_atomic64_try_cmpxchg_acquire #endif #ifndef arch_atomic64_try_cmpxchg_release static __always_inline bool arch_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) { __atomic_release_fence(); return arch_atomic64_try_cmpxchg_relaxed(v, old, new); } #define arch_atomic64_try_cmpxchg_release arch_atomic64_try_cmpxchg_release #endif #ifndef arch_atomic64_try_cmpxchg static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { bool ret; __atomic_pre_full_fence(); ret = arch_atomic64_try_cmpxchg_relaxed(v, old, new); __atomic_post_full_fence(); return ret; } #define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg #endif #endif /* arch_atomic64_try_cmpxchg_relaxed */ #ifndef arch_atomic64_sub_and_test /** * arch_atomic64_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @v: pointer of type atomic64_t * * Atomically subtracts @i from @v and returns * true if the result is zero, or false for all * other cases. */ static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) { return arch_atomic64_sub_return(i, v) == 0; } #define arch_atomic64_sub_and_test arch_atomic64_sub_and_test #endif #ifndef arch_atomic64_dec_and_test /** * arch_atomic64_dec_and_test - decrement and test * @v: pointer of type atomic64_t * * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other * cases. */ static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v) { return arch_atomic64_dec_return(v) == 0; } #define arch_atomic64_dec_and_test arch_atomic64_dec_and_test #endif #ifndef arch_atomic64_inc_and_test /** * arch_atomic64_inc_and_test - increment and test * @v: pointer of type atomic64_t * * Atomically increments @v by 1 * and returns true if the result is zero, or false for all * other cases. */ static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v) { return arch_atomic64_inc_return(v) == 0; } #define arch_atomic64_inc_and_test arch_atomic64_inc_and_test #endif #ifndef arch_atomic64_add_negative /** * arch_atomic64_add_negative - add and test if negative * @i: integer value to add * @v: pointer of type atomic64_t * * Atomically adds @i to @v and returns true * if the result is negative, or false when * result is greater than or equal to zero. */ static __always_inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v) { return arch_atomic64_add_return(i, v) < 0; } #define arch_atomic64_add_negative arch_atomic64_add_negative #endif #ifndef arch_atomic64_fetch_add_unless /** * arch_atomic64_fetch_add_unless - add unless the number is already a given value * @v: pointer of type atomic64_t * @a: the amount to add to v... * @u: ...unless v is equal to u. * * Atomically adds @a to @v, so long as @v was not already @u. * Returns original value of @v */ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { s64 c = arch_atomic64_read(v); do { if (unlikely(c == u)) break; } while (!arch_atomic64_try_cmpxchg(v, &c, c + a)); return c; } #define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless #endif #ifndef arch_atomic64_add_unless /** * arch_atomic64_add_unless - add unless the number is already a given value * @v: pointer of type atomic64_t * @a: the amount to add to v... * @u: ...unless v is equal to u. * * Atomically adds @a to @v, if @v was not already @u. * Returns true if the addition was done. */ static __always_inline bool arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) { return arch_atomic64_fetch_add_unless(v, a, u) != u; } #define arch_atomic64_add_unless arch_atomic64_add_unless #endif #ifndef arch_atomic64_inc_not_zero /** * arch_atomic64_inc_not_zero - increment unless the number is zero * @v: pointer of type atomic64_t * * Atomically increments @v by 1, if @v is non-zero. * Returns true if the increment was done. */ static __always_inline bool arch_atomic64_inc_not_zero(atomic64_t *v) { return arch_atomic64_add_unless(v, 1, 0); } #define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero #endif #ifndef arch_atomic64_inc_unless_negative static __always_inline bool arch_atomic64_inc_unless_negative(atomic64_t *v) { s64 c = arch_atomic64_read(v); do { if (unlikely(c < 0)) return false; } while (!arch_atomic64_try_cmpxchg(v, &c, c + 1)); return true; } #define arch_atomic64_inc_unless_negative arch_atomic64_inc_unless_negative #endif #ifndef arch_atomic64_dec_unless_positive static __always_inline bool arch_atomic64_dec_unless_positive(atomic64_t *v) { s64 c = arch_atomic64_read(v); do { if (unlikely(c > 0)) return false; } while (!arch_atomic64_try_cmpxchg(v, &c, c - 1)); return true; } #define arch_atomic64_dec_unless_positive arch_atomic64_dec_unless_positive #endif #ifndef arch_atomic64_dec_if_positive static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) { s64 dec, c = arch_atomic64_read(v); do { dec = c - 1; if (unlikely(dec < 0)) break; } while (!arch_atomic64_try_cmpxchg(v, &c, dec)); return dec; } #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive #endif #endif /* _LINUX_ATOMIC_FALLBACK_H */ // 90cd26cfd69d2250303d654955a0cc12620fb91b
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM libata #if !defined(_TRACE_LIBATA_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_LIBATA_H #include <linux/ata.h> #include <linux/libata.h> #include <linux/tracepoint.h> #include <linux/trace_seq.h> #define ata_opcode_name(opcode) { opcode, #opcode } #define show_opcode_name(val) \ __print_symbolic(val, \ ata_opcode_name(ATA_CMD_DEV_RESET), \ ata_opcode_name(ATA_CMD_CHK_POWER), \ ata_opcode_name(ATA_CMD_STANDBY), \ ata_opcode_name(ATA_CMD_IDLE), \ ata_opcode_name(ATA_CMD_EDD), \ ata_opcode_name(ATA_CMD_DOWNLOAD_MICRO), \ ata_opcode_name(ATA_CMD_DOWNLOAD_MICRO_DMA), \ ata_opcode_name(ATA_CMD_NOP), \ ata_opcode_name(ATA_CMD_FLUSH), \ ata_opcode_name(ATA_CMD_FLUSH_EXT), \ ata_opcode_name(ATA_CMD_ID_ATA), \ ata_opcode_name(ATA_CMD_ID_ATAPI), \ ata_opcode_name(ATA_CMD_SERVICE), \ ata_opcode_name(ATA_CMD_READ), \ ata_opcode_name(ATA_CMD_READ_EXT), \ ata_opcode_name(ATA_CMD_READ_QUEUED), \ ata_opcode_name(ATA_CMD_READ_STREAM_EXT), \ ata_opcode_name(ATA_CMD_READ_STREAM_DMA_EXT), \ ata_opcode_name(ATA_CMD_WRITE), \ ata_opcode_name(ATA_CMD_WRITE_EXT), \ ata_opcode_name(ATA_CMD_WRITE_QUEUED), \ ata_opcode_name(ATA_CMD_WRITE_STREAM_EXT), \ ata_opcode_name(ATA_CMD_WRITE_STREAM_DMA_EXT), \ ata_opcode_name(ATA_CMD_WRITE_FUA_EXT), \ ata_opcode_name(ATA_CMD_WRITE_QUEUED_FUA_EXT), \ ata_opcode_name(ATA_CMD_FPDMA_READ), \ ata_opcode_name(ATA_CMD_FPDMA_WRITE), \ ata_opcode_name(ATA_CMD_NCQ_NON_DATA), \ ata_opcode_name(ATA_CMD_FPDMA_SEND), \ ata_opcode_name(ATA_CMD_FPDMA_RECV), \ ata_opcode_name(ATA_CMD_PIO_READ), \ ata_opcode_name(ATA_CMD_PIO_READ_EXT), \ ata_opcode_name(ATA_CMD_PIO_WRITE), \ ata_opcode_name(ATA_CMD_PIO_WRITE_EXT), \ ata_opcode_name(ATA_CMD_READ_MULTI), \ ata_opcode_name(ATA_CMD_READ_MULTI_EXT), \ ata_opcode_name(ATA_CMD_WRITE_MULTI), \ ata_opcode_name(ATA_CMD_WRITE_MULTI_EXT), \ ata_opcode_name(ATA_CMD_WRITE_MULTI_FUA_EXT), \ ata_opcode_name(ATA_CMD_SET_FEATURES), \ ata_opcode_name(ATA_CMD_SET_MULTI), \ ata_opcode_name(ATA_CMD_PACKET), \ ata_opcode_name(ATA_CMD_VERIFY), \ ata_opcode_name(ATA_CMD_VERIFY_EXT), \ ata_opcode_name(ATA_CMD_WRITE_UNCORR_EXT), \ ata_opcode_name(ATA_CMD_STANDBYNOW1), \ ata_opcode_name(ATA_CMD_IDLEIMMEDIATE), \ ata_opcode_name(ATA_CMD_SLEEP), \ ata_opcode_name(ATA_CMD_INIT_DEV_PARAMS), \ ata_opcode_name(ATA_CMD_READ_NATIVE_MAX), \ ata_opcode_name(ATA_CMD_READ_NATIVE_MAX_EXT), \ ata_opcode_name(ATA_CMD_SET_MAX), \ ata_opcode_name(ATA_CMD_SET_MAX_EXT), \ ata_opcode_name(ATA_CMD_READ_LOG_EXT), \ ata_opcode_name(ATA_CMD_WRITE_LOG_EXT), \ ata_opcode_name(ATA_CMD_READ_LOG_DMA_EXT), \ ata_opcode_name(ATA_CMD_WRITE_LOG_DMA_EXT), \ ata_opcode_name(ATA_CMD_TRUSTED_NONDATA), \ ata_opcode_name(ATA_CMD_TRUSTED_RCV), \ ata_opcode_name(ATA_CMD_TRUSTED_RCV_DMA), \ ata_opcode_name(ATA_CMD_TRUSTED_SND), \ ata_opcode_name(ATA_CMD_TRUSTED_SND_DMA), \ ata_opcode_name(ATA_CMD_PMP_READ), \ ata_opcode_name(ATA_CMD_PMP_READ_DMA), \ ata_opcode_name(ATA_CMD_PMP_WRITE), \ ata_opcode_name(ATA_CMD_PMP_WRITE_DMA), \ ata_opcode_name(ATA_CMD_CONF_OVERLAY), \ ata_opcode_name(ATA_CMD_SEC_SET_PASS), \ ata_opcode_name(ATA_CMD_SEC_UNLOCK), \ ata_opcode_name(ATA_CMD_SEC_ERASE_PREP), \ ata_opcode_name(ATA_CMD_SEC_ERASE_UNIT), \ ata_opcode_name(ATA_CMD_SEC_FREEZE_LOCK), \ ata_opcode_name(ATA_CMD_SEC_DISABLE_PASS), \ ata_opcode_name(ATA_CMD_CONFIG_STREAM), \ ata_opcode_name(ATA_CMD_SMART), \ ata_opcode_name(ATA_CMD_MEDIA_LOCK), \ ata_opcode_name(ATA_CMD_MEDIA_UNLOCK), \ ata_opcode_name(ATA_CMD_DSM), \ ata_opcode_name(ATA_CMD_CHK_MED_CRD_TYP), \ ata_opcode_name(ATA_CMD_CFA_REQ_EXT_ERR), \ ata_opcode_name(ATA_CMD_CFA_WRITE_NE), \ ata_opcode_name(ATA_CMD_CFA_TRANS_SECT), \ ata_opcode_name(ATA_CMD_CFA_ERASE), \ ata_opcode_name(ATA_CMD_CFA_WRITE_MULT_NE), \ ata_opcode_name(ATA_CMD_REQ_SENSE_DATA), \ ata_opcode_name(ATA_CMD_SANITIZE_DEVICE), \ ata_opcode_name(ATA_CMD_ZAC_MGMT_IN), \ ata_opcode_name(ATA_CMD_ZAC_MGMT_OUT), \ ata_opcode_name(ATA_CMD_RESTORE), \ ata_opcode_name(ATA_CMD_READ_LONG), \ ata_opcode_name(ATA_CMD_READ_LONG_ONCE), \ ata_opcode_name(ATA_CMD_WRITE_LONG), \ ata_opcode_name(ATA_CMD_WRITE_LONG_ONCE)) #define ata_error_name(result) { result, #result } #define show_error_name(val) \ __print_symbolic(val, \ ata_error_name(ATA_ICRC), \ ata_error_name(ATA_UNC), \ ata_error_name(ATA_MC), \ ata_error_name(ATA_IDNF), \ ata_error_name(ATA_MCR), \ ata_error_name(ATA_ABORTED), \ ata_error_name(ATA_TRK0NF), \ ata_error_name(ATA_AMNF)) #define ata_protocol_name(proto) { proto, #proto } #define show_protocol_name(val) \ __print_symbolic(val, \ ata_protocol_name(ATA_PROT_UNKNOWN), \ ata_protocol_name(ATA_PROT_NODATA), \ ata_protocol_name(ATA_PROT_PIO), \ ata_protocol_name(ATA_PROT_DMA), \ ata_protocol_name(ATA_PROT_NCQ), \ ata_protocol_name(ATA_PROT_NCQ_NODATA), \ ata_protocol_name(ATAPI_PROT_NODATA), \ ata_protocol_name(ATAPI_PROT_PIO), \ ata_protocol_name(ATAPI_PROT_DMA)) const char *libata_trace_parse_status(struct trace_seq*, unsigned char); #define __parse_status(s) libata_trace_parse_status(p, s) const char *libata_trace_parse_eh_action(struct trace_seq *, unsigned int); #define __parse_eh_action(a) libata_trace_parse_eh_action(p, a) const char *libata_trace_parse_eh_err_mask(struct trace_seq *, unsigned int); #define __parse_eh_err_mask(m) libata_trace_parse_eh_err_mask(p, m) const char *libata_trace_parse_qc_flags(struct trace_seq *, unsigned int); #define __parse_qc_flags(f) libata_trace_parse_qc_flags(p, f) const char *libata_trace_parse_subcmd(struct trace_seq *, unsigned char, unsigned char, unsigned char); #define __parse_subcmd(c,f,h) libata_trace_parse_subcmd(p, c, f, h) TRACE_EVENT(ata_qc_issue, TP_PROTO(struct ata_queued_cmd *qc), TP_ARGS(qc), TP_STRUCT__entry( __field( unsigned int, ata_port ) __field( unsigned int, ata_dev ) __field( unsigned int, tag ) __field( unsigned char, cmd ) __field( unsigned char, dev ) __field( unsigned char, lbal ) __field( unsigned char, lbam ) __field( unsigned char, lbah ) __field( unsigned char, nsect ) __field( unsigned char, feature ) __field( unsigned char, hob_lbal ) __field( unsigned char, hob_lbam ) __field( unsigned char, hob_lbah ) __field( unsigned char, hob_nsect ) __field( unsigned char, hob_feature ) __field( unsigned char, ctl ) __field( unsigned char, proto ) __field( unsigned long, flags ) ), TP_fast_assign( __entry->ata_port = qc->ap->print_id; __entry->ata_dev = qc->dev->link->pmp + qc->dev->devno; __entry->tag = qc->tag; __entry->proto = qc->tf.protocol; __entry->cmd = qc->tf.command; __entry->dev = qc->tf.device; __entry->lbal = qc->tf.lbal; __entry->lbam = qc->tf.lbam; __entry->lbah = qc->tf.lbah; __entry->hob_lbal = qc->tf.hob_lbal; __entry->hob_lbam = qc->tf.hob_lbam; __entry->hob_lbah = qc->tf.hob_lbah; __entry->feature = qc->tf.feature; __entry->hob_feature = qc->tf.hob_feature; __entry->nsect = qc->tf.nsect; __entry->hob_nsect = qc->tf.hob_nsect; ), TP_printk("ata_port=%u ata_dev=%u tag=%d proto=%s cmd=%s%s " \ " tf=(%02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x)", __entry->ata_port, __entry->ata_dev, __entry->tag, show_protocol_name(__entry->proto), show_opcode_name(__entry->cmd), __parse_subcmd(__entry->cmd, __entry->feature, __entry->hob_nsect), __entry->cmd, __entry->feature, __entry->nsect, __entry->lbal, __entry->lbam, __entry->lbah, __entry->hob_feature, __entry->hob_nsect, __entry->hob_lbal, __entry->hob_lbam, __entry->hob_lbah, __entry->dev) ); DECLARE_EVENT_CLASS(ata_qc_complete_template, TP_PROTO(struct ata_queued_cmd *qc), TP_ARGS(qc), TP_STRUCT__entry( __field( unsigned int, ata_port ) __field( unsigned int, ata_dev ) __field( unsigned int, tag ) __field( unsigned char, status ) __field( unsigned char, dev ) __field( unsigned char, lbal ) __field( unsigned char, lbam ) __field( unsigned char, lbah ) __field( unsigned char, nsect ) __field( unsigned char, error ) __field( unsigned char, hob_lbal ) __field( unsigned char, hob_lbam ) __field( unsigned char, hob_lbah ) __field( unsigned char, hob_nsect ) __field( unsigned char, hob_feature ) __field( unsigned char, ctl ) __field( unsigned long, flags ) ), TP_fast_assign( __entry->ata_port = qc->ap->print_id; __entry->ata_dev = qc->dev->link->pmp + qc->dev->devno; __entry->tag = qc->tag; __entry->status = qc->result_tf.command; __entry->dev = qc->result_tf.device; __entry->lbal = qc->result_tf.lbal; __entry->lbam = qc->result_tf.lbam; __entry->lbah = qc->result_tf.lbah; __entry->hob_lbal = qc->result_tf.hob_lbal; __entry->hob_lbam = qc->result_tf.hob_lbam; __entry->hob_lbah = qc->result_tf.hob_lbah; __entry->error = qc->result_tf.feature; __entry->hob_feature = qc->result_tf.hob_feature; __entry->nsect = qc->result_tf.nsect; __entry->hob_nsect = qc->result_tf.hob_nsect; ), TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s status=%s " \ " res=(%02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x)", __entry->ata_port, __entry->ata_dev, __entry->tag, __parse_qc_flags(__entry->flags), __parse_status(__entry->status), __entry->status, __entry->error, __entry->nsect, __entry->lbal, __entry->lbam, __entry->lbah, __entry->hob_feature, __entry->hob_nsect, __entry->hob_lbal, __entry->hob_lbam, __entry->hob_lbah, __entry->dev) ); DEFINE_EVENT(ata_qc_complete_template, ata_qc_complete_internal, TP_PROTO(struct ata_queued_cmd *qc), TP_ARGS(qc)); DEFINE_EVENT(ata_qc_complete_template, ata_qc_complete_failed, TP_PROTO(struct ata_queued_cmd *qc), TP_ARGS(qc)); DEFINE_EVENT(ata_qc_complete_template, ata_qc_complete_done, TP_PROTO(struct ata_queued_cmd *qc), TP_ARGS(qc)); TRACE_EVENT(ata_eh_link_autopsy, TP_PROTO(struct ata_device *dev, unsigned int eh_action, unsigned int eh_err_mask), TP_ARGS(dev, eh_action, eh_err_mask), TP_STRUCT__entry( __field( unsigned int, ata_port ) __field( unsigned int, ata_dev ) __field( unsigned int, eh_action ) __field( unsigned int, eh_err_mask) ), TP_fast_assign( __entry->ata_port = dev->link->ap->print_id; __entry->ata_dev = dev->link->pmp + dev->devno; __entry->eh_action = eh_action; __entry->eh_err_mask = eh_err_mask; ), TP_printk("ata_port=%u ata_dev=%u eh_action=%s err_mask=%s", __entry->ata_port, __entry->ata_dev, __parse_eh_action(__entry->eh_action), __parse_eh_err_mask(__entry->eh_err_mask)) ); TRACE_EVENT(ata_eh_link_autopsy_qc, TP_PROTO(struct ata_queued_cmd *qc), TP_ARGS(qc), TP_STRUCT__entry( __field( unsigned int, ata_port ) __field( unsigned int, ata_dev ) __field( unsigned int, tag ) __field( unsigned int, qc_flags ) __field( unsigned int, eh_err_mask) ), TP_fast_assign( __entry->ata_port = qc->ap->print_id; __entry->ata_dev = qc->dev->link->pmp + qc->dev->devno; __entry->tag = qc->tag; __entry->qc_flags = qc->flags; __entry->eh_err_mask = qc->err_mask; ), TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s err_mask=%s", __entry->ata_port, __entry->ata_dev, __entry->tag, __parse_qc_flags(__entry->qc_flags), __parse_eh_err_mask(__entry->eh_err_mask)) ); #endif /* _TRACE_LIBATA_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 /* SPDX-License-Identifier: GPL-2.0 */ /* * Generic nexthop implementation * * Copyright (c) 2017-19 Cumulus Networks * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> */ #ifndef __LINUX_NEXTHOP_H #define __LINUX_NEXTHOP_H #include <linux/netdevice.h> #include <linux/notifier.h> #include <linux/route.h> #include <linux/types.h> #include <net/ip_fib.h> #include <net/ip6_fib.h> #include <net/netlink.h> #define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK struct nexthop; struct nh_config { u32 nh_id; u8 nh_family; u8 nh_protocol; u8 nh_blackhole; u8 nh_fdb; u32 nh_flags; int nh_ifindex; struct net_device *dev; union { __be32 ipv4; struct in6_addr ipv6; } gw; struct nlattr *nh_grp; u16 nh_grp_type; struct nlattr *nh_encap; u16 nh_encap_type; u32 nlflags; struct nl_info nlinfo; }; struct nh_info { struct hlist_node dev_hash; /* entry on netns devhash */ struct nexthop *nh_parent; u8 family; bool reject_nh; bool fdb_nh; union { struct fib_nh_common fib_nhc; struct fib_nh fib_nh; struct fib6_nh fib6_nh; }; }; struct nh_grp_entry { struct nexthop *nh; u8 weight; atomic_t upper_bound; struct list_head nh_list; struct nexthop *nh_parent; /* nexthop of group with this entry */ }; struct nh_group { struct nh_group *spare; /* spare group for removals */ u16 num_nh; bool mpath; bool fdb_nh; bool has_v4; struct nh_grp_entry nh_entries[]; }; struct nexthop { struct rb_node rb_node; /* entry on netns rbtree */ struct list_head fi_list; /* v4 entries using nh */ struct list_head f6i_list; /* v6 entries using nh */ struct list_head fdb_list; /* fdb entries using this nh */ struct list_head grp_list; /* nh group entries using this nh */ struct net *net; u32 id; u8 protocol; /* app managing this nh */ u8 nh_flags; bool is_group; refcount_t refcnt; struct rcu_head rcu; union { struct nh_info __rcu *nh_info; struct nh_group __rcu *nh_grp; }; }; enum nexthop_event_type { NEXTHOP_EVENT_DEL }; int register_nexthop_notifier(struct net *net, struct notifier_block *nb); int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); /* caller is holding rcu or rtnl; no reference taken to nexthop */ struct nexthop *nexthop_find_by_id(struct net *net, u32 id); void nexthop_free_rcu(struct rcu_head *head); static inline bool nexthop_get(struct nexthop *nh) { return refcount_inc_not_zero(&nh->refcnt); } static inline void nexthop_put(struct nexthop *nh) { if (refcount_dec_and_test(&nh->refcnt)) call_rcu(&nh->rcu, nexthop_free_rcu); } static inline bool nexthop_cmp(const struct nexthop *nh1, const struct nexthop *nh2) { return nh1 == nh2; } static inline bool nexthop_is_fdb(const struct nexthop *nh) { if (nh->is_group) { const struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); return nh_grp->fdb_nh; } else { const struct nh_info *nhi; nhi = rcu_dereference_rtnl(nh->nh_info); return nhi->fdb_nh; } } static inline bool nexthop_has_v4(const struct nexthop *nh) { if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); return nh_grp->has_v4; } return false; } static inline bool nexthop_is_multipath(const struct nexthop *nh) { if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); return nh_grp->mpath; } return false; } struct nexthop *nexthop_select_path(struct nexthop *nh, int hash); static inline unsigned int nexthop_num_path(const struct nexthop *nh) { unsigned int rc = 1; if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); if (nh_grp->mpath) rc = nh_grp->num_nh; } return rc; } static inline struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel) { /* for_nexthops macros in fib_semantics.c grabs a pointer to * the nexthop before checking nhsel */ if (nhsel >= nhg->num_nh) return NULL; return nhg->nh_entries[nhsel].nh; } static inline int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, u8 rt_family) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); int i; for (i = 0; i < nhg->num_nh; i++) { struct nexthop *nhe = nhg->nh_entries[i].nh; struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info); struct fib_nh_common *nhc = &nhi->fib_nhc; int weight = nhg->nh_entries[i].weight; if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0) return -EMSGSIZE; } return 0; } /* called with rcu lock */ static inline bool nexthop_is_blackhole(const struct nexthop *nh) { const struct nh_info *nhi; if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); if (nh_grp->num_nh > 1) return false; nh = nh_grp->nh_entries[0].nh; } nhi = rcu_dereference_rtnl(nh->nh_info); return nhi->reject_nh; } static inline void nexthop_path_fib_result(struct fib_result *res, int hash) { struct nh_info *nhi; struct nexthop *nh; nh = nexthop_select_path(res->fi->nh, hash); nhi = rcu_dereference(nh->nh_info); res->nhc = &nhi->fib_nhc; } /* called with rcu read lock or rtnl held */ static inline struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel) { struct nh_info *nhi; BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0); BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0); if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); if (nh_grp->mpath) { nh = nexthop_mpath_select(nh_grp, nhsel); if (!nh) return NULL; } } nhi = rcu_dereference_rtnl(nh->nh_info); return &nhi->fib_nhc; } /* called from fib_table_lookup with rcu_lock */ static inline struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh, int fib_flags, const struct flowi4 *flp, int *nhsel) { struct nh_info *nhi; if (nh->is_group) { struct nh_group *nhg = rcu_dereference(nh->nh_grp); int i; for (i = 0; i < nhg->num_nh; i++) { struct nexthop *nhe = nhg->nh_entries[i].nh; nhi = rcu_dereference(nhe->nh_info); if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { *nhsel = i; return &nhi->fib_nhc; } } } else { nhi = rcu_dereference(nh->nh_info); if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { *nhsel = 0; return &nhi->fib_nhc; } } return NULL; } static inline bool nexthop_uses_dev(const struct nexthop *nh, const struct net_device *dev) { struct nh_info *nhi; if (nh->is_group) { struct nh_group *nhg = rcu_dereference(nh->nh_grp); int i; for (i = 0; i < nhg->num_nh; i++) { struct nexthop *nhe = nhg->nh_entries[i].nh; nhi = rcu_dereference(nhe->nh_info); if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) return true; } } else { nhi = rcu_dereference(nh->nh_info); if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) return true; } return false; } static inline unsigned int fib_info_num_path(const struct fib_info *fi) { if (unlikely(fi->nh)) return nexthop_num_path(fi->nh); return fi->fib_nhs; } int fib_check_nexthop(struct nexthop *nh, u8 scope, struct netlink_ext_ack *extack); static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) { if (unlikely(fi->nh)) return nexthop_fib_nhc(fi->nh, nhsel); return &fi->fib_nh[nhsel].nh_common; } /* only used when fib_nh is built into fib_info */ static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel) { WARN_ON(fi->nh); return &fi->fib_nh[nhsel]; } /* * IPv6 variants */ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, struct netlink_ext_ack *extack); /* Caller should either hold rcu_read_lock(), or RTNL. */ static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) { struct nh_info *nhi; if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); nh = nexthop_mpath_select(nh_grp, 0); if (!nh) return NULL; } nhi = rcu_dereference_rtnl(nh->nh_info); if (nhi->family == AF_INET6) return &nhi->fib6_nh; return NULL; } /* Variant of nexthop_fib6_nh(). * Caller should either hold rcu_read_lock_bh(), or RTNL. */ static inline struct fib6_nh *nexthop_fib6_nh_bh(struct nexthop *nh) { struct nh_info *nhi; if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_bh_rtnl(nh->nh_grp); nh = nexthop_mpath_select(nh_grp, 0); if (!nh) return NULL; } nhi = rcu_dereference_bh_rtnl(nh->nh_info); if (nhi->family == AF_INET6) return &nhi->fib6_nh; return NULL; } static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i) { struct fib6_nh *fib6_nh; fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh; return fib6_nh->fib_nh_dev; } static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) { struct nexthop *nh = res->f6i->nh; struct nh_info *nhi; nh = nexthop_select_path(nh, hash); nhi = rcu_dereference_rtnl(nh->nh_info); if (nhi->reject_nh) { res->fib6_type = RTN_BLACKHOLE; res->fib6_flags |= RTF_REJECT; res->nh = nexthop_fib6_nh(nh); } else { res->nh = &nhi->fib6_nh; } } int nexthop_for_each_fib6_nh(struct nexthop *nh, int (*cb)(struct fib6_nh *nh, void *arg), void *arg); static inline int nexthop_get_family(struct nexthop *nh) { struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); return nhi->family; } static inline struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh) { struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); return &nhi->fib_nhc; } static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh, int hash) { struct nh_info *nhi; struct nexthop *nhp; nhp = nexthop_select_path(nh, hash); if (unlikely(!nhp)) return NULL; nhi = rcu_dereference(nhp->nh_info); return &nhi->fib_nhc; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 /* SPDX-License-Identifier: GPL-2.0-only */ /* * async.h: Asynchronous function calls for boot performance * * (C) Copyright 2009 Intel Corporation * Author: Arjan van de Ven <arjan@linux.intel.com> */ #ifndef __ASYNC_H__ #define __ASYNC_H__ #include <linux/types.h> #include <linux/list.h> #include <linux/numa.h> #include <linux/device.h> typedef u64 async_cookie_t; typedef void (*async_func_t) (void *data, async_cookie_t cookie); struct async_domain { struct list_head pending; unsigned registered:1; }; /* * domain participates in global async_synchronize_full */ #define ASYNC_DOMAIN(_name) \ struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \ .registered = 1 } /* * domain is free to go out of scope as soon as all pending work is * complete, this domain does not participate in async_synchronize_full */ #define ASYNC_DOMAIN_EXCLUSIVE(_name) \ struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \ .registered = 0 } async_cookie_t async_schedule_node(async_func_t func, void *data, int node); async_cookie_t async_schedule_node_domain(async_func_t func, void *data, int node, struct async_domain *domain); /** * async_schedule - schedule a function for asynchronous execution * @func: function to execute asynchronously * @data: data pointer to pass to the function * * Returns an async_cookie_t that may be used for checkpointing later. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule(async_func_t func, void *data) { return async_schedule_node(func, data, NUMA_NO_NODE); } /** * async_schedule_domain - schedule a function for asynchronous execution within a certain domain * @func: function to execute asynchronously * @data: data pointer to pass to the function * @domain: the domain * * Returns an async_cookie_t that may be used for checkpointing later. * @domain may be used in the async_synchronize_*_domain() functions to * wait within a certain synchronization domain rather than globally. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule_domain(async_func_t func, void *data, struct async_domain *domain) { return async_schedule_node_domain(func, data, NUMA_NO_NODE, domain); } /** * async_schedule_dev - A device specific version of async_schedule * @func: function to execute asynchronously * @dev: device argument to be passed to function * * Returns an async_cookie_t that may be used for checkpointing later. * @dev is used as both the argument for the function and to provide NUMA * context for where to run the function. By doing this we can try to * provide for the best possible outcome by operating on the device on the * CPUs closest to the device. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule_dev(async_func_t func, struct device *dev) { return async_schedule_node(func, dev, dev_to_node(dev)); } /** * async_schedule_dev_domain - A device specific version of async_schedule_domain * @func: function to execute asynchronously * @dev: device argument to be passed to function * @domain: the domain * * Returns an async_cookie_t that may be used for checkpointing later. * @dev is used as both the argument for the function and to provide NUMA * context for where to run the function. By doing this we can try to * provide for the best possible outcome by operating on the device on the * CPUs closest to the device. * @domain may be used in the async_synchronize_*_domain() functions to * wait within a certain synchronization domain rather than globally. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t